From 97f17497d162afdb82c8704bf097f0fee3724b2e Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 14 Jun 2016 07:50:17 -0700 Subject: Imported Upstream version 16.04 Change-Id: I77eadcd8538a9122e4773cbe55b24033dc451757 Signed-off-by: C.J. Collier --- lib/Makefile | 66 + lib/librte_acl/Makefile | 96 + lib/librte_acl/acl.h | 241 + lib/librte_acl/acl_bld.c | 1598 +++ lib/librte_acl/acl_gen.c | 561 + lib/librte_acl/acl_run.h | 263 + lib/librte_acl/acl_run_avx2.c | 54 + lib/librte_acl/acl_run_avx2.h | 284 + lib/librte_acl/acl_run_neon.c | 46 + lib/librte_acl/acl_run_neon.h | 289 + lib/librte_acl/acl_run_scalar.c | 192 + lib/librte_acl/acl_run_sse.c | 47 + lib/librte_acl/acl_run_sse.h | 357 + lib/librte_acl/acl_vect.h | 116 + lib/librte_acl/rte_acl.c | 393 + lib/librte_acl/rte_acl.h | 388 + lib/librte_acl/rte_acl_osdep.h | 80 + lib/librte_acl/rte_acl_version.map | 19 + lib/librte_acl/tb_mem.c | 108 + lib/librte_acl/tb_mem.h | 76 + lib/librte_cfgfile/Makefile | 57 + lib/librte_cfgfile/rte_cfgfile.c | 374 + lib/librte_cfgfile/rte_cfgfile.h | 239 + lib/librte_cfgfile/rte_cfgfile_version.map | 22 + lib/librte_cmdline/Makefile | 67 + lib/librte_cmdline/cmdline.c | 301 + lib/librte_cmdline/cmdline.h | 115 + lib/librte_cmdline/cmdline_cirbuf.c | 466 + lib/librte_cmdline/cmdline_cirbuf.h | 245 + lib/librte_cmdline/cmdline_parse.c | 563 + lib/librte_cmdline/cmdline_parse.h | 191 + lib/librte_cmdline/cmdline_parse_etheraddr.c | 180 + lib/librte_cmdline/cmdline_parse_etheraddr.h | 96 + lib/librte_cmdline/cmdline_parse_ipaddr.c | 408 + lib/librte_cmdline/cmdline_parse_ipaddr.h | 189 + lib/librte_cmdline/cmdline_parse_num.c | 402 + lib/librte_cmdline/cmdline_parse_num.h | 115 + lib/librte_cmdline/cmdline_parse_portlist.c | 173 + lib/librte_cmdline/cmdline_parse_portlist.h | 103 + lib/librte_cmdline/cmdline_parse_string.c | 253 + lib/librte_cmdline/cmdline_parse_string.h | 112 + lib/librte_cmdline/cmdline_rdline.c | 697 ++ lib/librte_cmdline/cmdline_rdline.h | 255 + lib/librte_cmdline/cmdline_socket.c | 119 + lib/librte_cmdline/cmdline_socket.h | 76 + lib/librte_cmdline/cmdline_vt100.c | 185 + lib/librte_cmdline/cmdline_vt100.h | 153 + lib/librte_cmdline/rte_cmdline_version.map | 78 + lib/librte_compat/Makefile | 40 + lib/librte_compat/rte_compat.h | 105 + lib/librte_cryptodev/Makefile | 62 + lib/librte_cryptodev/rte_crypto.h | 415 + lib/librte_cryptodev/rte_crypto_sym.h | 662 ++ lib/librte_cryptodev/rte_cryptodev.c | 1162 +++ lib/librte_cryptodev/rte_cryptodev.h | 896 ++ lib/librte_cryptodev/rte_cryptodev_pmd.h | 543 + lib/librte_cryptodev/rte_cryptodev_version.map | 34 + lib/librte_distributor/Makefile | 54 + lib/librte_distributor/rte_distributor.c | 487 + lib/librte_distributor/rte_distributor.h | 247 + lib/librte_distributor/rte_distributor_version.map | 15 + lib/librte_eal/Makefile | 38 + lib/librte_eal/bsdapp/Makefile | 38 + lib/librte_eal/bsdapp/contigmem/BSDmakefile | 36 + lib/librte_eal/bsdapp/contigmem/Makefile | 52 + lib/librte_eal/bsdapp/contigmem/contigmem.c | 232 + lib/librte_eal/bsdapp/eal/Makefile | 117 + lib/librte_eal/bsdapp/eal/eal.c | 638 ++ lib/librte_eal/bsdapp/eal/eal_alarm.c | 60 + lib/librte_eal/bsdapp/eal/eal_debug.c | 119 + lib/librte_eal/bsdapp/eal/eal_hugepage_info.c | 134 + lib/librte_eal/bsdapp/eal/eal_interrupts.c | 119 + lib/librte_eal/bsdapp/eal/eal_lcore.c | 79 + lib/librte_eal/bsdapp/eal/eal_log.c | 57 + lib/librte_eal/bsdapp/eal/eal_memory.c | 194 + lib/librte_eal/bsdapp/eal/eal_pci.c | 633 ++ lib/librte_eal/bsdapp/eal/eal_thread.c | 201 + lib/librte_eal/bsdapp/eal/eal_timer.c | 94 + .../bsdapp/eal/include/exec-env/rte_dom0_common.h | 107 + .../bsdapp/eal/include/exec-env/rte_interrupts.h | 137 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 153 + lib/librte_eal/bsdapp/nic_uio/BSDmakefile | 36 + lib/librte_eal/bsdapp/nic_uio/Makefile | 52 + lib/librte_eal/bsdapp/nic_uio/nic_uio.c | 335 + lib/librte_eal/common/Makefile | 61 + lib/librte_eal/common/arch/arm/rte_cpuflags.c | 179 + lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c | 147 + lib/librte_eal/common/arch/tile/rte_cpuflags.c | 47 + lib/librte_eal/common/arch/x86/rte_cpuflags.c | 220 + lib/librte_eal/common/eal_common_cpuflags.c | 76 + lib/librte_eal/common/eal_common_dev.c | 152 + lib/librte_eal/common/eal_common_devargs.c | 176 + lib/librte_eal/common/eal_common_errno.c | 72 + lib/librte_eal/common/eal_common_hexdump.c | 120 + lib/librte_eal/common/eal_common_launch.c | 118 + lib/librte_eal/common/eal_common_lcore.c | 110 + lib/librte_eal/common/eal_common_log.c | 337 + lib/librte_eal/common/eal_common_memory.c | 154 + lib/librte_eal/common/eal_common_memzone.c | 445 + lib/librte_eal/common/eal_common_options.c | 1022 ++ lib/librte_eal/common/eal_common_pci.c | 457 + lib/librte_eal/common/eal_common_pci_uio.c | 222 + lib/librte_eal/common/eal_common_proc.c | 61 + lib/librte_eal/common/eal_common_string_fns.c | 69 + lib/librte_eal/common/eal_common_tailqs.c | 202 + lib/librte_eal/common/eal_common_thread.c | 157 + lib/librte_eal/common/eal_common_timer.c | 86 + lib/librte_eal/common/eal_filesystem.h | 118 + lib/librte_eal/common/eal_hugepages.h | 67 + lib/librte_eal/common/eal_internal_cfg.h | 94 + lib/librte_eal/common/eal_options.h | 100 + lib/librte_eal/common/eal_private.h | 331 + lib/librte_eal/common/eal_thread.h | 100 + .../common/include/arch/arm/rte_atomic.h | 48 + .../common/include/arch/arm/rte_atomic_32.h | 74 + .../common/include/arch/arm/rte_atomic_64.h | 88 + .../common/include/arch/arm/rte_byteorder.h | 107 + .../common/include/arch/arm/rte_cpuflags.h | 42 + .../common/include/arch/arm/rte_cpuflags_32.h | 82 + .../common/include/arch/arm/rte_cpuflags_64.h | 64 + .../common/include/arch/arm/rte_cycles.h | 42 + .../common/include/arch/arm/rte_cycles_32.h | 121 + .../common/include/arch/arm/rte_cycles_64.h | 71 + .../common/include/arch/arm/rte_memcpy.h | 42 + .../common/include/arch/arm/rte_memcpy_32.h | 338 + .../common/include/arch/arm/rte_memcpy_64.h | 93 + .../common/include/arch/arm/rte_prefetch.h | 42 + .../common/include/arch/arm/rte_prefetch_32.h | 67 + .../common/include/arch/arm/rte_prefetch_64.h | 66 + .../common/include/arch/arm/rte_rwlock.h | 40 + .../common/include/arch/arm/rte_spinlock.h | 92 + lib/librte_eal/common/include/arch/arm/rte_vect.h | 83 + .../common/include/arch/ppc_64/rte_atomic.h | 432 + .../common/include/arch/ppc_64/rte_byteorder.h | 149 + .../common/include/arch/ppc_64/rte_cpuflags.h | 88 + .../common/include/arch/ppc_64/rte_cycles.h | 94 + .../common/include/arch/ppc_64/rte_memcpy.h | 225 + .../common/include/arch/ppc_64/rte_prefetch.h | 67 + .../common/include/arch/ppc_64/rte_rwlock.h | 38 + .../common/include/arch/ppc_64/rte_spinlock.h | 114 + .../common/include/arch/tile/rte_atomic.h | 92 + .../common/include/arch/tile/rte_byteorder.h | 91 + .../common/include/arch/tile/rte_cpuflags.h | 53 + .../common/include/arch/tile/rte_cycles.h | 70 + .../common/include/arch/tile/rte_memcpy.h | 93 + .../common/include/arch/tile/rte_prefetch.h | 67 + .../common/include/arch/tile/rte_rwlock.h | 70 + .../common/include/arch/tile/rte_spinlock.h | 92 + .../common/include/arch/x86/rte_atomic.h | 222 + .../common/include/arch/x86/rte_atomic_32.h | 222 + .../common/include/arch/x86/rte_atomic_64.h | 191 + .../common/include/arch/x86/rte_byteorder.h | 125 + .../common/include/arch/x86/rte_byteorder_32.h | 51 + .../common/include/arch/x86/rte_byteorder_64.h | 52 + .../common/include/arch/x86/rte_cpuflags.h | 153 + .../common/include/arch/x86/rte_cycles.h | 121 + .../common/include/arch/x86/rte_memcpy.h | 884 ++ .../common/include/arch/x86/rte_prefetch.h | 67 + lib/librte_eal/common/include/arch/x86/rte_rtm.h | 73 + .../common/include/arch/x86/rte_rwlock.h | 82 + .../common/include/arch/x86/rte_spinlock.h | 201 + lib/librte_eal/common/include/arch/x86/rte_vect.h | 132 + lib/librte_eal/common/include/generic/rte_atomic.h | 945 ++ .../common/include/generic/rte_byteorder.h | 217 + .../common/include/generic/rte_cpuflags.h | 82 + lib/librte_eal/common/include/generic/rte_cycles.h | 205 + lib/librte_eal/common/include/generic/rte_memcpy.h | 144 + .../common/include/generic/rte_prefetch.h | 83 + lib/librte_eal/common/include/generic/rte_rwlock.h | 208 + .../common/include/generic/rte_spinlock.h | 325 + lib/librte_eal/common/include/rte_alarm.h | 106 + .../common/include/rte_branch_prediction.h | 70 + lib/librte_eal/common/include/rte_common.h | 401 + lib/librte_eal/common/include/rte_debug.h | 103 + lib/librte_eal/common/include/rte_dev.h | 192 + lib/librte_eal/common/include/rte_devargs.h | 177 + lib/librte_eal/common/include/rte_eal.h | 259 + lib/librte_eal/common/include/rte_eal_memconfig.h | 100 + lib/librte_eal/common/include/rte_errno.h | 95 + lib/librte_eal/common/include/rte_hexdump.h | 89 + lib/librte_eal/common/include/rte_interrupts.h | 120 + lib/librte_eal/common/include/rte_keepalive.h | 108 + lib/librte_eal/common/include/rte_launch.h | 177 + lib/librte_eal/common/include/rte_lcore.h | 276 + lib/librte_eal/common/include/rte_log.h | 311 + lib/librte_eal/common/include/rte_malloc.h | 342 + lib/librte_eal/common/include/rte_malloc_heap.h | 55 + lib/librte_eal/common/include/rte_memory.h | 263 + lib/librte_eal/common/include/rte_memzone.h | 305 + lib/librte_eal/common/include/rte_pci.h | 598 ++ .../common/include/rte_pci_dev_feature_defs.h | 70 + .../common/include/rte_pci_dev_features.h | 69 + lib/librte_eal/common/include/rte_pci_dev_ids.h | 704 ++ lib/librte_eal/common/include/rte_per_lcore.h | 79 + lib/librte_eal/common/include/rte_random.h | 91 + lib/librte_eal/common/include/rte_string_fns.h | 81 + lib/librte_eal/common/include/rte_tailq.h | 162 + lib/librte_eal/common/include/rte_time.h | 122 + lib/librte_eal/common/include/rte_version.h | 130 + lib/librte_eal/common/include/rte_warnings.h | 84 + lib/librte_eal/common/malloc_elem.c | 344 + lib/librte_eal/common/malloc_elem.h | 192 + lib/librte_eal/common/malloc_heap.c | 236 + lib/librte_eal/common/malloc_heap.h | 70 + lib/librte_eal/common/rte_keepalive.c | 149 + lib/librte_eal/common/rte_malloc.c | 262 + lib/librte_eal/linuxapp/Makefile | 39 + lib/librte_eal/linuxapp/eal/Makefile | 139 + lib/librte_eal/linuxapp/eal/eal.c | 936 ++ lib/librte_eal/linuxapp/eal/eal_alarm.c | 273 + lib/librte_eal/linuxapp/eal/eal_debug.c | 119 + lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 365 + lib/librte_eal/linuxapp/eal/eal_interrupts.c | 1224 +++ lib/librte_eal/linuxapp/eal/eal_ivshmem.c | 955 ++ lib/librte_eal/linuxapp/eal/eal_lcore.c | 110 + lib/librte_eal/linuxapp/eal/eal_log.c | 146 + lib/librte_eal/linuxapp/eal/eal_memory.c | 1559 +++ lib/librte_eal/linuxapp/eal/eal_pci.c | 762 ++ lib/librte_eal/linuxapp/eal/eal_pci_init.h | 127 + lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 491 + lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 1097 ++ lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c | 405 + lib/librte_eal/linuxapp/eal/eal_thread.c | 199 + lib/librte_eal/linuxapp/eal/eal_timer.c | 305 + lib/librte_eal/linuxapp/eal/eal_vfio.h | 67 + lib/librte_eal/linuxapp/eal/eal_xen_memory.c | 369 + .../eal/include/exec-env/rte_dom0_common.h | 108 + .../linuxapp/eal/include/exec-env/rte_interrupts.h | 228 + .../linuxapp/eal/include/exec-env/rte_kni_common.h | 172 + lib/librte_eal/linuxapp/eal/rte_eal_version.map | 156 + lib/librte_eal/linuxapp/igb_uio/Makefile | 53 + lib/librte_eal/linuxapp/igb_uio/compat.h | 116 + lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 580 ++ lib/librte_eal/linuxapp/kni/Makefile | 93 + lib/librte_eal/linuxapp/kni/compat.h | 29 + lib/librte_eal/linuxapp/kni/ethtool/README | 100 + lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING | 339 + .../linuxapp/kni/ethtool/igb/e1000_82575.c | 3665 +++++++ .../linuxapp/kni/ethtool/igb/e1000_82575.h | 509 + .../linuxapp/kni/ethtool/igb/e1000_api.c | 1159 +++ .../linuxapp/kni/ethtool/igb/e1000_api.h | 157 + .../linuxapp/kni/ethtool/igb/e1000_defines.h | 1380 +++ lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h | 793 ++ .../linuxapp/kni/ethtool/igb/e1000_i210.c | 909 ++ .../linuxapp/kni/ethtool/igb/e1000_i210.h | 91 + .../linuxapp/kni/ethtool/igb/e1000_mac.c | 2096 ++++ .../linuxapp/kni/ethtool/igb/e1000_mac.h | 80 + .../linuxapp/kni/ethtool/igb/e1000_manage.c | 554 + .../linuxapp/kni/ethtool/igb/e1000_manage.h | 89 + .../linuxapp/kni/ethtool/igb/e1000_mbx.c | 525 + .../linuxapp/kni/ethtool/igb/e1000_mbx.h | 87 + .../linuxapp/kni/ethtool/igb/e1000_nvm.c | 965 ++ .../linuxapp/kni/ethtool/igb/e1000_nvm.h | 75 + .../linuxapp/kni/ethtool/igb/e1000_osdep.h | 136 + .../linuxapp/kni/ethtool/igb/e1000_phy.c | 3405 ++++++ .../linuxapp/kni/ethtool/igb/e1000_phy.h | 256 + .../linuxapp/kni/ethtool/igb/e1000_regs.h | 646 ++ lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h | 859 ++ .../linuxapp/kni/ethtool/igb/igb_debugfs.c | 28 + .../linuxapp/kni/ethtool/igb/igb_ethtool.c | 2857 +++++ .../linuxapp/kni/ethtool/igb/igb_hwmon.c | 260 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c | 10291 +++++++++++++++++++ .../linuxapp/kni/ethtool/igb/igb_param.c | 847 ++ .../linuxapp/kni/ethtool/igb/igb_procfs.c | 363 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c | 944 ++ .../linuxapp/kni/ethtool/igb/igb_regtest.h | 249 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c | 436 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h | 46 + lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c | 1482 +++ lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h | 3918 +++++++ .../linuxapp/kni/ethtool/igb/kcompat_ethtool.c | 1171 +++ lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING | 339 + lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h | 925 ++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c | 1296 +++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h | 44 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c | 2313 +++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h | 58 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.c | 1157 +++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.h | 168 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.c | 4082 ++++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.h | 140 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h | 168 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c | 2901 ++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h | 91 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_main.c | 2970 ++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h | 105 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h | 132 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c | 1847 ++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h | 137 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h | 73 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_type.h | 3254 ++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c | 937 ++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h | 58 + .../linuxapp/kni/ethtool/ixgbe/kcompat.c | 1246 +++ .../linuxapp/kni/ethtool/ixgbe/kcompat.h | 3143 ++++++ lib/librte_eal/linuxapp/kni/kni_dev.h | 149 + lib/librte_eal/linuxapp/kni/kni_ethtool.c | 217 + lib/librte_eal/linuxapp/kni/kni_fifo.h | 108 + lib/librte_eal/linuxapp/kni/kni_misc.c | 688 ++ lib/librte_eal/linuxapp/kni/kni_net.c | 706 ++ lib/librte_eal/linuxapp/kni/kni_vhost.c | 857 ++ lib/librte_eal/linuxapp/xen_dom0/Makefile | 56 + lib/librte_eal/linuxapp/xen_dom0/compat.h | 15 + lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h | 107 + lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c | 780 ++ lib/librte_ether/Makefile | 59 + lib/librte_ether/rte_dev_info.h | 57 + lib/librte_ether/rte_eth_ctrl.h | 846 ++ lib/librte_ether/rte_ethdev.c | 3371 ++++++ lib/librte_ether/rte_ethdev.h | 4286 ++++++++ lib/librte_ether/rte_ether.h | 416 + lib/librte_ether/rte_ether_version.map | 134 + lib/librte_hash/Makefile | 61 + lib/librte_hash/rte_cmp_arm64.h | 114 + lib/librte_hash/rte_cmp_x86.h | 109 + lib/librte_hash/rte_crc_arm64.h | 215 + lib/librte_hash/rte_cuckoo_hash.c | 1323 +++ lib/librte_hash/rte_fbk_hash.c | 231 + lib/librte_hash/rte_fbk_hash.h | 396 + lib/librte_hash/rte_hash.h | 436 + lib/librte_hash/rte_hash_crc.h | 639 ++ lib/librte_hash/rte_hash_version.map | 40 + lib/librte_hash/rte_jhash.h | 410 + lib/librte_hash/rte_thash.h | 250 + lib/librte_ip_frag/Makefile | 59 + lib/librte_ip_frag/ip_frag_common.h | 169 + lib/librte_ip_frag/ip_frag_internal.c | 418 + lib/librte_ip_frag/rte_ip_frag.h | 363 + lib/librte_ip_frag/rte_ip_frag_common.c | 139 + lib/librte_ip_frag/rte_ipfrag_version.map | 13 + lib/librte_ip_frag/rte_ipv4_fragmentation.c | 209 + lib/librte_ip_frag/rte_ipv4_reassembly.c | 184 + lib/librte_ip_frag/rte_ipv6_fragmentation.c | 207 + lib/librte_ip_frag/rte_ipv6_reassembly.c | 225 + lib/librte_ivshmem/Makefile | 52 + lib/librte_ivshmem/rte_ivshmem.c | 905 ++ lib/librte_ivshmem/rte_ivshmem.h | 165 + lib/librte_ivshmem/rte_ivshmem_version.map | 12 + lib/librte_jobstats/Makefile | 53 + lib/librte_jobstats/rte_jobstats.c | 293 + lib/librte_jobstats/rte_jobstats.h | 336 + lib/librte_jobstats/rte_jobstats_version.map | 26 + lib/librte_kni/Makefile | 53 + lib/librte_kni/rte_kni.c | 712 ++ lib/librte_kni/rte_kni.h | 256 + lib/librte_kni/rte_kni_fifo.h | 93 + lib/librte_kni/rte_kni_version.map | 17 + lib/librte_kvargs/Makefile | 55 + lib/librte_kvargs/rte_kvargs.c | 210 + lib/librte_kvargs/rte_kvargs.h | 156 + lib/librte_kvargs/rte_kvargs_version.map | 10 + lib/librte_lpm/Makefile | 59 + lib/librte_lpm/rte_lpm.c | 1919 ++++ lib/librte_lpm/rte_lpm.h | 491 + lib/librte_lpm/rte_lpm6.c | 883 ++ lib/librte_lpm/rte_lpm6.h | 227 + lib/librte_lpm/rte_lpm_neon.h | 153 + lib/librte_lpm/rte_lpm_sse.h | 149 + lib/librte_lpm/rte_lpm_version.map | 36 + lib/librte_mbuf/Makefile | 52 + lib/librte_mbuf/rte_mbuf.c | 288 + lib/librte_mbuf/rte_mbuf.h | 1946 ++++ lib/librte_mbuf/rte_mbuf_version.map | 20 + lib/librte_mempool/Makefile | 53 + lib/librte_mempool/rte_dom0_mempool.c | 133 + lib/librte_mempool/rte_mempool.c | 919 ++ lib/librte_mempool/rte_mempool.h | 1408 +++ lib/librte_mempool/rte_mempool_version.map | 19 + lib/librte_meter/Makefile | 59 + lib/librte_meter/rte_meter.c | 120 + lib/librte_meter/rte_meter.h | 387 + lib/librte_meter/rte_meter_version.map | 12 + lib/librte_net/Makefile | 40 + lib/librte_net/rte_arp.h | 83 + lib/librte_net/rte_icmp.h | 101 + lib/librte_net/rte_ip.h | 413 + lib/librte_net/rte_sctp.h | 99 + lib/librte_net/rte_tcp.h | 104 + lib/librte_net/rte_udp.h | 99 + lib/librte_pipeline/Makefile | 58 + lib/librte_pipeline/rte_pipeline.c | 1649 +++ lib/librte_pipeline/rte_pipeline.h | 875 ++ lib/librte_pipeline/rte_pipeline_version.map | 47 + lib/librte_port/Makefile | 79 + lib/librte_port/rte_port.h | 263 + lib/librte_port/rte_port_ethdev.c | 553 + lib/librte_port/rte_port_ethdev.h | 105 + lib/librte_port/rte_port_frag.c | 305 + lib/librte_port/rte_port_frag.h | 101 + lib/librte_port/rte_port_ras.c | 359 + lib/librte_port/rte_port_ras.h | 90 + lib/librte_port/rte_port_ring.c | 810 ++ lib/librte_port/rte_port_ring.h | 123 + lib/librte_port/rte_port_sched.c | 323 + lib/librte_port/rte_port_sched.h | 82 + lib/librte_port/rte_port_source_sink.c | 667 ++ lib/librte_port/rte_port_source_sink.h | 90 + lib/librte_port/rte_port_version.map | 37 + lib/librte_power/Makefile | 53 + lib/librte_power/channel_commands.h | 74 + lib/librte_power/guest_channel.c | 161 + lib/librte_power/guest_channel.h | 89 + lib/librte_power/rte_power.c | 143 + lib/librte_power/rte_power.h | 243 + lib/librte_power/rte_power_acpi_cpufreq.c | 545 + lib/librte_power/rte_power_acpi_cpufreq.h | 192 + lib/librte_power/rte_power_common.h | 39 + lib/librte_power/rte_power_kvm_vm.c | 135 + lib/librte_power/rte_power_kvm_vm.h | 179 + lib/librte_power/rte_power_version.map | 18 + lib/librte_reorder/Makefile | 54 + lib/librte_reorder/rte_reorder.c | 411 + lib/librte_reorder/rte_reorder.h | 180 + lib/librte_reorder/rte_reorder_version.map | 13 + lib/librte_ring/Makefile | 51 + lib/librte_ring/rte_ring.c | 373 + lib/librte_ring/rte_ring.h | 1261 +++ lib/librte_ring/rte_ring_version.map | 20 + lib/librte_sched/Makefile | 65 + lib/librte_sched/rte_approx.c | 196 + lib/librte_sched/rte_approx.h | 75 + lib/librte_sched/rte_bitmap.h | 560 + lib/librte_sched/rte_reciprocal.c | 72 + lib/librte_sched/rte_reciprocal.h | 39 + lib/librte_sched/rte_red.c | 158 + lib/librte_sched/rte_red.h | 453 + lib/librte_sched/rte_sched.c | 2156 ++++ lib/librte_sched/rte_sched.h | 455 + lib/librte_sched/rte_sched_common.h | 129 + lib/librte_sched/rte_sched_version.map | 31 + lib/librte_table/Makefile | 85 + lib/librte_table/rte_lru.h | 213 + lib/librte_table/rte_table.h | 301 + lib/librte_table/rte_table_acl.c | 835 ++ lib/librte_table/rte_table_acl.h | 95 + lib/librte_table/rte_table_array.c | 237 + lib/librte_table/rte_table_array.h | 76 + lib/librte_table/rte_table_hash.h | 370 + lib/librte_table/rte_table_hash_ext.c | 1159 +++ lib/librte_table/rte_table_hash_key16.c | 1515 +++ lib/librte_table/rte_table_hash_key32.c | 1144 +++ lib/librte_table/rte_table_hash_key8.c | 1471 +++ lib/librte_table/rte_table_hash_lru.c | 1102 ++ lib/librte_table/rte_table_lpm.c | 393 + lib/librte_table/rte_table_lpm.h | 124 + lib/librte_table/rte_table_lpm_ipv6.c | 398 + lib/librte_table/rte_table_lpm_ipv6.h | 122 + lib/librte_table/rte_table_stub.c | 121 + lib/librte_table/rte_table_stub.h | 62 + lib/librte_table/rte_table_version.map | 28 + lib/librte_timer/Makefile | 52 + lib/librte_timer/rte_timer.c | 637 ++ lib/librte_timer/rte_timer.h | 335 + lib/librte_timer/rte_timer_version.map | 15 + lib/librte_vhost/Makefile | 71 + lib/librte_vhost/eventfd_link/Makefile | 41 + lib/librte_vhost/eventfd_link/eventfd_link.c | 277 + lib/librte_vhost/eventfd_link/eventfd_link.h | 94 + lib/librte_vhost/libvirt/qemu-wrap.py | 387 + lib/librte_vhost/rte_vhost_version.map | 22 + lib/librte_vhost/rte_virtio_net.h | 286 + lib/librte_vhost/vhost-net.h | 114 + lib/librte_vhost/vhost_cuse/eventfd_copy.c | 104 + lib/librte_vhost/vhost_cuse/eventfd_copy.h | 45 + lib/librte_vhost/vhost_cuse/vhost-net-cdev.c | 426 + lib/librte_vhost/vhost_cuse/virtio-net-cdev.c | 435 + lib/librte_vhost/vhost_cuse/virtio-net-cdev.h | 48 + lib/librte_vhost/vhost_rxtx.c | 947 ++ lib/librte_vhost/vhost_user/fd_man.c | 289 + lib/librte_vhost/vhost_user/fd_man.h | 67 + lib/librte_vhost/vhost_user/vhost-net-user.c | 531 + lib/librte_vhost/vhost_user/vhost-net-user.h | 117 + lib/librte_vhost/vhost_user/virtio-net-user.c | 446 + lib/librte_vhost/vhost_user/virtio-net-user.h | 64 + lib/librte_vhost/virtio-net.c | 772 ++ lib/librte_vhost/virtio-net.h | 43 + 476 files changed, 185671 insertions(+) create mode 100644 lib/Makefile create mode 100644 lib/librte_acl/Makefile create mode 100644 lib/librte_acl/acl.h create mode 100644 lib/librte_acl/acl_bld.c create mode 100644 lib/librte_acl/acl_gen.c create mode 100644 lib/librte_acl/acl_run.h create mode 100644 lib/librte_acl/acl_run_avx2.c create mode 100644 lib/librte_acl/acl_run_avx2.h create mode 100644 lib/librte_acl/acl_run_neon.c create mode 100644 lib/librte_acl/acl_run_neon.h create mode 100644 lib/librte_acl/acl_run_scalar.c create mode 100644 lib/librte_acl/acl_run_sse.c create mode 100644 lib/librte_acl/acl_run_sse.h create mode 100644 lib/librte_acl/acl_vect.h create mode 100644 lib/librte_acl/rte_acl.c create mode 100644 lib/librte_acl/rte_acl.h create mode 100644 lib/librte_acl/rte_acl_osdep.h create mode 100644 lib/librte_acl/rte_acl_version.map create mode 100644 lib/librte_acl/tb_mem.c create mode 100644 lib/librte_acl/tb_mem.h create mode 100644 lib/librte_cfgfile/Makefile create mode 100644 lib/librte_cfgfile/rte_cfgfile.c create mode 100644 lib/librte_cfgfile/rte_cfgfile.h create mode 100644 lib/librte_cfgfile/rte_cfgfile_version.map create mode 100644 lib/librte_cmdline/Makefile create mode 100644 lib/librte_cmdline/cmdline.c create mode 100644 lib/librte_cmdline/cmdline.h create mode 100644 lib/librte_cmdline/cmdline_cirbuf.c create mode 100644 lib/librte_cmdline/cmdline_cirbuf.h create mode 100644 lib/librte_cmdline/cmdline_parse.c create mode 100644 lib/librte_cmdline/cmdline_parse.h create mode 100644 lib/librte_cmdline/cmdline_parse_etheraddr.c create mode 100644 lib/librte_cmdline/cmdline_parse_etheraddr.h create mode 100644 lib/librte_cmdline/cmdline_parse_ipaddr.c create mode 100644 lib/librte_cmdline/cmdline_parse_ipaddr.h create mode 100644 lib/librte_cmdline/cmdline_parse_num.c create mode 100644 lib/librte_cmdline/cmdline_parse_num.h create mode 100644 lib/librte_cmdline/cmdline_parse_portlist.c create mode 100644 lib/librte_cmdline/cmdline_parse_portlist.h create mode 100644 lib/librte_cmdline/cmdline_parse_string.c create mode 100644 lib/librte_cmdline/cmdline_parse_string.h create mode 100644 lib/librte_cmdline/cmdline_rdline.c create mode 100644 lib/librte_cmdline/cmdline_rdline.h create mode 100644 lib/librte_cmdline/cmdline_socket.c create mode 100644 lib/librte_cmdline/cmdline_socket.h create mode 100644 lib/librte_cmdline/cmdline_vt100.c create mode 100644 lib/librte_cmdline/cmdline_vt100.h create mode 100644 lib/librte_cmdline/rte_cmdline_version.map create mode 100644 lib/librte_compat/Makefile create mode 100644 lib/librte_compat/rte_compat.h create mode 100644 lib/librte_cryptodev/Makefile create mode 100644 lib/librte_cryptodev/rte_crypto.h create mode 100644 lib/librte_cryptodev/rte_crypto_sym.h create mode 100644 lib/librte_cryptodev/rte_cryptodev.c create mode 100644 lib/librte_cryptodev/rte_cryptodev.h create mode 100644 lib/librte_cryptodev/rte_cryptodev_pmd.h create mode 100644 lib/librte_cryptodev/rte_cryptodev_version.map create mode 100644 lib/librte_distributor/Makefile create mode 100644 lib/librte_distributor/rte_distributor.c create mode 100644 lib/librte_distributor/rte_distributor.h create mode 100644 lib/librte_distributor/rte_distributor_version.map create mode 100644 lib/librte_eal/Makefile create mode 100644 lib/librte_eal/bsdapp/Makefile create mode 100644 lib/librte_eal/bsdapp/contigmem/BSDmakefile create mode 100644 lib/librte_eal/bsdapp/contigmem/Makefile create mode 100644 lib/librte_eal/bsdapp/contigmem/contigmem.c create mode 100644 lib/librte_eal/bsdapp/eal/Makefile create mode 100644 lib/librte_eal/bsdapp/eal/eal.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_alarm.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_debug.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_hugepage_info.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_interrupts.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_lcore.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_log.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_memory.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_pci.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_thread.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_timer.c create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h create mode 100644 lib/librte_eal/bsdapp/eal/rte_eal_version.map create mode 100644 lib/librte_eal/bsdapp/nic_uio/BSDmakefile create mode 100644 lib/librte_eal/bsdapp/nic_uio/Makefile create mode 100644 lib/librte_eal/bsdapp/nic_uio/nic_uio.c create mode 100644 lib/librte_eal/common/Makefile create mode 100644 lib/librte_eal/common/arch/arm/rte_cpuflags.c create mode 100644 lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c create mode 100644 lib/librte_eal/common/arch/tile/rte_cpuflags.c create mode 100644 lib/librte_eal/common/arch/x86/rte_cpuflags.c create mode 100644 lib/librte_eal/common/eal_common_cpuflags.c create mode 100644 lib/librte_eal/common/eal_common_dev.c create mode 100644 lib/librte_eal/common/eal_common_devargs.c create mode 100644 lib/librte_eal/common/eal_common_errno.c create mode 100644 lib/librte_eal/common/eal_common_hexdump.c create mode 100644 lib/librte_eal/common/eal_common_launch.c create mode 100644 lib/librte_eal/common/eal_common_lcore.c create mode 100644 lib/librte_eal/common/eal_common_log.c create mode 100644 lib/librte_eal/common/eal_common_memory.c create mode 100644 lib/librte_eal/common/eal_common_memzone.c create mode 100644 lib/librte_eal/common/eal_common_options.c create mode 100644 lib/librte_eal/common/eal_common_pci.c create mode 100644 lib/librte_eal/common/eal_common_pci_uio.c create mode 100644 lib/librte_eal/common/eal_common_proc.c create mode 100644 lib/librte_eal/common/eal_common_string_fns.c create mode 100644 lib/librte_eal/common/eal_common_tailqs.c create mode 100644 lib/librte_eal/common/eal_common_thread.c create mode 100644 lib/librte_eal/common/eal_common_timer.c create mode 100644 lib/librte_eal/common/eal_filesystem.h create mode 100644 lib/librte_eal/common/eal_hugepages.h create mode 100644 lib/librte_eal/common/eal_internal_cfg.h create mode 100644 lib/librte_eal/common/eal_options.h create mode 100644 lib/librte_eal/common/eal_private.h create mode 100644 lib/librte_eal/common/eal_thread.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_vect.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic_32.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic_64.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_rtm.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_vect.h create mode 100644 lib/librte_eal/common/include/generic/rte_atomic.h create mode 100644 lib/librte_eal/common/include/generic/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/generic/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/generic/rte_cycles.h create mode 100644 lib/librte_eal/common/include/generic/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/generic/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/generic/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/generic/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/rte_alarm.h create mode 100644 lib/librte_eal/common/include/rte_branch_prediction.h create mode 100644 lib/librte_eal/common/include/rte_common.h create mode 100644 lib/librte_eal/common/include/rte_debug.h create mode 100644 lib/librte_eal/common/include/rte_dev.h create mode 100644 lib/librte_eal/common/include/rte_devargs.h create mode 100644 lib/librte_eal/common/include/rte_eal.h create mode 100644 lib/librte_eal/common/include/rte_eal_memconfig.h create mode 100644 lib/librte_eal/common/include/rte_errno.h create mode 100644 lib/librte_eal/common/include/rte_hexdump.h create mode 100644 lib/librte_eal/common/include/rte_interrupts.h create mode 100644 lib/librte_eal/common/include/rte_keepalive.h create mode 100644 lib/librte_eal/common/include/rte_launch.h create mode 100644 lib/librte_eal/common/include/rte_lcore.h create mode 100644 lib/librte_eal/common/include/rte_log.h create mode 100644 lib/librte_eal/common/include/rte_malloc.h create mode 100644 lib/librte_eal/common/include/rte_malloc_heap.h create mode 100644 lib/librte_eal/common/include/rte_memory.h create mode 100644 lib/librte_eal/common/include/rte_memzone.h create mode 100644 lib/librte_eal/common/include/rte_pci.h create mode 100644 lib/librte_eal/common/include/rte_pci_dev_feature_defs.h create mode 100644 lib/librte_eal/common/include/rte_pci_dev_features.h create mode 100644 lib/librte_eal/common/include/rte_pci_dev_ids.h create mode 100644 lib/librte_eal/common/include/rte_per_lcore.h create mode 100644 lib/librte_eal/common/include/rte_random.h create mode 100644 lib/librte_eal/common/include/rte_string_fns.h create mode 100644 lib/librte_eal/common/include/rte_tailq.h create mode 100644 lib/librte_eal/common/include/rte_time.h create mode 100644 lib/librte_eal/common/include/rte_version.h create mode 100644 lib/librte_eal/common/include/rte_warnings.h create mode 100644 lib/librte_eal/common/malloc_elem.c create mode 100644 lib/librte_eal/common/malloc_elem.h create mode 100644 lib/librte_eal/common/malloc_heap.c create mode 100644 lib/librte_eal/common/malloc_heap.h create mode 100644 lib/librte_eal/common/rte_keepalive.c create mode 100644 lib/librte_eal/common/rte_malloc.c create mode 100644 lib/librte_eal/linuxapp/Makefile create mode 100644 lib/librte_eal/linuxapp/eal/Makefile create mode 100644 lib/librte_eal/linuxapp/eal/eal.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_alarm.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_debug.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_hugepage_info.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_interrupts.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_ivshmem.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_lcore.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_log.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_memory.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_init.h create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_uio.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_thread.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_timer.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_vfio.h create mode 100644 lib/librte_eal/linuxapp/eal/eal_xen_memory.c create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h create mode 100644 lib/librte_eal/linuxapp/eal/rte_eal_version.map create mode 100644 lib/librte_eal/linuxapp/igb_uio/Makefile create mode 100644 lib/librte_eal/linuxapp/igb_uio/compat.h create mode 100644 lib/librte_eal/linuxapp/igb_uio/igb_uio.c create mode 100644 lib/librte_eal/linuxapp/kni/Makefile create mode 100644 lib/librte_eal/linuxapp/kni/compat.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/README create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h create mode 100644 lib/librte_eal/linuxapp/kni/kni_dev.h create mode 100644 lib/librte_eal/linuxapp/kni/kni_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/kni_fifo.h create mode 100644 lib/librte_eal/linuxapp/kni/kni_misc.c create mode 100644 lib/librte_eal/linuxapp/kni/kni_net.c create mode 100644 lib/librte_eal/linuxapp/kni/kni_vhost.c create mode 100644 lib/librte_eal/linuxapp/xen_dom0/Makefile create mode 100644 lib/librte_eal/linuxapp/xen_dom0/compat.h create mode 100644 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h create mode 100644 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c create mode 100644 lib/librte_ether/Makefile create mode 100644 lib/librte_ether/rte_dev_info.h create mode 100644 lib/librte_ether/rte_eth_ctrl.h create mode 100644 lib/librte_ether/rte_ethdev.c create mode 100644 lib/librte_ether/rte_ethdev.h create mode 100644 lib/librte_ether/rte_ether.h create mode 100644 lib/librte_ether/rte_ether_version.map create mode 100644 lib/librte_hash/Makefile create mode 100644 lib/librte_hash/rte_cmp_arm64.h create mode 100644 lib/librte_hash/rte_cmp_x86.h create mode 100644 lib/librte_hash/rte_crc_arm64.h create mode 100644 lib/librte_hash/rte_cuckoo_hash.c create mode 100644 lib/librte_hash/rte_fbk_hash.c create mode 100644 lib/librte_hash/rte_fbk_hash.h create mode 100644 lib/librte_hash/rte_hash.h create mode 100644 lib/librte_hash/rte_hash_crc.h create mode 100644 lib/librte_hash/rte_hash_version.map create mode 100644 lib/librte_hash/rte_jhash.h create mode 100644 lib/librte_hash/rte_thash.h create mode 100644 lib/librte_ip_frag/Makefile create mode 100644 lib/librte_ip_frag/ip_frag_common.h create mode 100644 lib/librte_ip_frag/ip_frag_internal.c create mode 100644 lib/librte_ip_frag/rte_ip_frag.h create mode 100644 lib/librte_ip_frag/rte_ip_frag_common.c create mode 100644 lib/librte_ip_frag/rte_ipfrag_version.map create mode 100644 lib/librte_ip_frag/rte_ipv4_fragmentation.c create mode 100644 lib/librte_ip_frag/rte_ipv4_reassembly.c create mode 100644 lib/librte_ip_frag/rte_ipv6_fragmentation.c create mode 100644 lib/librte_ip_frag/rte_ipv6_reassembly.c create mode 100644 lib/librte_ivshmem/Makefile create mode 100644 lib/librte_ivshmem/rte_ivshmem.c create mode 100644 lib/librte_ivshmem/rte_ivshmem.h create mode 100644 lib/librte_ivshmem/rte_ivshmem_version.map create mode 100644 lib/librte_jobstats/Makefile create mode 100644 lib/librte_jobstats/rte_jobstats.c create mode 100644 lib/librte_jobstats/rte_jobstats.h create mode 100644 lib/librte_jobstats/rte_jobstats_version.map create mode 100644 lib/librte_kni/Makefile create mode 100644 lib/librte_kni/rte_kni.c create mode 100644 lib/librte_kni/rte_kni.h create mode 100644 lib/librte_kni/rte_kni_fifo.h create mode 100644 lib/librte_kni/rte_kni_version.map create mode 100644 lib/librte_kvargs/Makefile create mode 100644 lib/librte_kvargs/rte_kvargs.c create mode 100644 lib/librte_kvargs/rte_kvargs.h create mode 100644 lib/librte_kvargs/rte_kvargs_version.map create mode 100644 lib/librte_lpm/Makefile create mode 100644 lib/librte_lpm/rte_lpm.c create mode 100644 lib/librte_lpm/rte_lpm.h create mode 100644 lib/librte_lpm/rte_lpm6.c create mode 100644 lib/librte_lpm/rte_lpm6.h create mode 100644 lib/librte_lpm/rte_lpm_neon.h create mode 100644 lib/librte_lpm/rte_lpm_sse.h create mode 100644 lib/librte_lpm/rte_lpm_version.map create mode 100644 lib/librte_mbuf/Makefile create mode 100644 lib/librte_mbuf/rte_mbuf.c create mode 100644 lib/librte_mbuf/rte_mbuf.h create mode 100644 lib/librte_mbuf/rte_mbuf_version.map create mode 100644 lib/librte_mempool/Makefile create mode 100644 lib/librte_mempool/rte_dom0_mempool.c create mode 100644 lib/librte_mempool/rte_mempool.c create mode 100644 lib/librte_mempool/rte_mempool.h create mode 100644 lib/librte_mempool/rte_mempool_version.map create mode 100644 lib/librte_meter/Makefile create mode 100644 lib/librte_meter/rte_meter.c create mode 100644 lib/librte_meter/rte_meter.h create mode 100644 lib/librte_meter/rte_meter_version.map create mode 100644 lib/librte_net/Makefile create mode 100644 lib/librte_net/rte_arp.h create mode 100644 lib/librte_net/rte_icmp.h create mode 100644 lib/librte_net/rte_ip.h create mode 100644 lib/librte_net/rte_sctp.h create mode 100644 lib/librte_net/rte_tcp.h create mode 100644 lib/librte_net/rte_udp.h create mode 100644 lib/librte_pipeline/Makefile create mode 100644 lib/librte_pipeline/rte_pipeline.c create mode 100644 lib/librte_pipeline/rte_pipeline.h create mode 100644 lib/librte_pipeline/rte_pipeline_version.map create mode 100644 lib/librte_port/Makefile create mode 100644 lib/librte_port/rte_port.h create mode 100644 lib/librte_port/rte_port_ethdev.c create mode 100644 lib/librte_port/rte_port_ethdev.h create mode 100644 lib/librte_port/rte_port_frag.c create mode 100644 lib/librte_port/rte_port_frag.h create mode 100644 lib/librte_port/rte_port_ras.c create mode 100644 lib/librte_port/rte_port_ras.h create mode 100644 lib/librte_port/rte_port_ring.c create mode 100644 lib/librte_port/rte_port_ring.h create mode 100644 lib/librte_port/rte_port_sched.c create mode 100644 lib/librte_port/rte_port_sched.h create mode 100644 lib/librte_port/rte_port_source_sink.c create mode 100644 lib/librte_port/rte_port_source_sink.h create mode 100644 lib/librte_port/rte_port_version.map create mode 100644 lib/librte_power/Makefile create mode 100644 lib/librte_power/channel_commands.h create mode 100644 lib/librte_power/guest_channel.c create mode 100644 lib/librte_power/guest_channel.h create mode 100644 lib/librte_power/rte_power.c create mode 100644 lib/librte_power/rte_power.h create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.c create mode 100644 lib/librte_power/rte_power_acpi_cpufreq.h create mode 100644 lib/librte_power/rte_power_common.h create mode 100644 lib/librte_power/rte_power_kvm_vm.c create mode 100644 lib/librte_power/rte_power_kvm_vm.h create mode 100644 lib/librte_power/rte_power_version.map create mode 100644 lib/librte_reorder/Makefile create mode 100644 lib/librte_reorder/rte_reorder.c create mode 100644 lib/librte_reorder/rte_reorder.h create mode 100644 lib/librte_reorder/rte_reorder_version.map create mode 100644 lib/librte_ring/Makefile create mode 100644 lib/librte_ring/rte_ring.c create mode 100644 lib/librte_ring/rte_ring.h create mode 100644 lib/librte_ring/rte_ring_version.map create mode 100644 lib/librte_sched/Makefile create mode 100644 lib/librte_sched/rte_approx.c create mode 100644 lib/librte_sched/rte_approx.h create mode 100644 lib/librte_sched/rte_bitmap.h create mode 100644 lib/librte_sched/rte_reciprocal.c create mode 100644 lib/librte_sched/rte_reciprocal.h create mode 100644 lib/librte_sched/rte_red.c create mode 100644 lib/librte_sched/rte_red.h create mode 100644 lib/librte_sched/rte_sched.c create mode 100644 lib/librte_sched/rte_sched.h create mode 100644 lib/librte_sched/rte_sched_common.h create mode 100644 lib/librte_sched/rte_sched_version.map create mode 100644 lib/librte_table/Makefile create mode 100644 lib/librte_table/rte_lru.h create mode 100644 lib/librte_table/rte_table.h create mode 100644 lib/librte_table/rte_table_acl.c create mode 100644 lib/librte_table/rte_table_acl.h create mode 100644 lib/librte_table/rte_table_array.c create mode 100644 lib/librte_table/rte_table_array.h create mode 100644 lib/librte_table/rte_table_hash.h create mode 100644 lib/librte_table/rte_table_hash_ext.c create mode 100644 lib/librte_table/rte_table_hash_key16.c create mode 100644 lib/librte_table/rte_table_hash_key32.c create mode 100644 lib/librte_table/rte_table_hash_key8.c create mode 100644 lib/librte_table/rte_table_hash_lru.c create mode 100644 lib/librte_table/rte_table_lpm.c create mode 100644 lib/librte_table/rte_table_lpm.h create mode 100644 lib/librte_table/rte_table_lpm_ipv6.c create mode 100644 lib/librte_table/rte_table_lpm_ipv6.h create mode 100644 lib/librte_table/rte_table_stub.c create mode 100644 lib/librte_table/rte_table_stub.h create mode 100644 lib/librte_table/rte_table_version.map create mode 100644 lib/librte_timer/Makefile create mode 100644 lib/librte_timer/rte_timer.c create mode 100644 lib/librte_timer/rte_timer.h create mode 100644 lib/librte_timer/rte_timer_version.map create mode 100644 lib/librte_vhost/Makefile create mode 100644 lib/librte_vhost/eventfd_link/Makefile create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.c create mode 100644 lib/librte_vhost/eventfd_link/eventfd_link.h create mode 100755 lib/librte_vhost/libvirt/qemu-wrap.py create mode 100644 lib/librte_vhost/rte_vhost_version.map create mode 100644 lib/librte_vhost/rte_virtio_net.h create mode 100644 lib/librte_vhost/vhost-net.h create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.c create mode 100644 lib/librte_vhost/vhost_cuse/eventfd_copy.h create mode 100644 lib/librte_vhost/vhost_cuse/vhost-net-cdev.c create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.c create mode 100644 lib/librte_vhost/vhost_cuse/virtio-net-cdev.h create mode 100644 lib/librte_vhost/vhost_rxtx.c create mode 100644 lib/librte_vhost/vhost_user/fd_man.c create mode 100644 lib/librte_vhost/vhost_user/fd_man.h create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.c create mode 100644 lib/librte_vhost/vhost_user/vhost-net-user.h create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.c create mode 100644 lib/librte_vhost/vhost_user/virtio-net-user.h create mode 100644 lib/librte_vhost/virtio-net.c create mode 100644 lib/librte_vhost/virtio-net.h (limited to 'lib') diff --git a/lib/Makefile b/lib/Makefile new file mode 100644 index 00000000..f254dba8 --- /dev/null +++ b/lib/Makefile @@ -0,0 +1,66 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-y += librte_compat +DIRS-$(CONFIG_RTE_LIBRTE_EAL) += librte_eal +DIRS-$(CONFIG_RTE_LIBRTE_RING) += librte_ring +DIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += librte_mempool +DIRS-$(CONFIG_RTE_LIBRTE_MBUF) += librte_mbuf +DIRS-$(CONFIG_RTE_LIBRTE_TIMER) += librte_timer +DIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += librte_cfgfile +DIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += librte_cmdline +DIRS-$(CONFIG_RTE_LIBRTE_ETHER) += librte_ether +DIRS-$(CONFIG_RTE_LIBRTE_CRYPTODEV) += librte_cryptodev +DIRS-$(CONFIG_RTE_LIBRTE_VHOST) += librte_vhost +DIRS-$(CONFIG_RTE_LIBRTE_HASH) += librte_hash +DIRS-$(CONFIG_RTE_LIBRTE_LPM) += librte_lpm +DIRS-$(CONFIG_RTE_LIBRTE_ACL) += librte_acl +DIRS-$(CONFIG_RTE_LIBRTE_NET) += librte_net +DIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += librte_ip_frag +DIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += librte_jobstats +DIRS-$(CONFIG_RTE_LIBRTE_POWER) += librte_power +DIRS-$(CONFIG_RTE_LIBRTE_METER) += librte_meter +DIRS-$(CONFIG_RTE_LIBRTE_SCHED) += librte_sched +DIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += librte_kvargs +DIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += librte_distributor +DIRS-$(CONFIG_RTE_LIBRTE_PORT) += librte_port +DIRS-$(CONFIG_RTE_LIBRTE_TABLE) += librte_table +DIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += librte_pipeline +DIRS-$(CONFIG_RTE_LIBRTE_REORDER) += librte_reorder + +ifeq ($(CONFIG_RTE_EXEC_ENV_LINUXAPP),y) +DIRS-$(CONFIG_RTE_LIBRTE_KNI) += librte_kni +DIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += librte_ivshmem +endif + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_acl/Makefile b/lib/librte_acl/Makefile new file mode 100644 index 00000000..2e394c97 --- /dev/null +++ b/lib/librte_acl/Makefile @@ -0,0 +1,96 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_acl.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_acl_version.map + +LIBABIVER := 2 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += tb_mem.c + +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += rte_acl.c +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_bld.c +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_gen.c +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_scalar.c + +ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),) +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_neon.c +CFLAGS_acl_run_neon.o += -flax-vector-conversions -Wno-maybe-uninitialized +else +SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_sse.c +#check if flag for SSE4.1 is already on, if not set it up manually + ifeq ($(findstring RTE_MACHINE_CPUFLAG_SSE4_1,$(CFLAGS)),) + CFLAGS_acl_run_sse.o += -msse4.1 + endif +endif + +# +# If the compiler supports AVX2 instructions, +# then add support for AVX2 classify method. +# + +#check if flag for AVX2 is already on, if not set it up manually +ifeq ($(findstring RTE_MACHINE_CPUFLAG_AVX2,$(CFLAGS)),RTE_MACHINE_CPUFLAG_AVX2) + CC_AVX2_SUPPORT=1 +else + CC_AVX2_SUPPORT=\ + $(shell $(CC) -march=core-avx2 -dM -E - &1 | \ + grep -q AVX2 && echo 1) + ifeq ($(CC_AVX2_SUPPORT), 1) + ifeq ($(CC), icc) + CFLAGS_acl_run_avx2.o += -march=core-avx2 + else + CFLAGS_acl_run_avx2.o += -mavx2 + endif + endif +endif + +ifeq ($(CC_AVX2_SUPPORT), 1) + SRCS-$(CONFIG_RTE_LIBRTE_ACL) += acl_run_avx2.c + CFLAGS_rte_acl.o += -DCC_AVX2_SUPPORT +endif + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include := rte_acl_osdep.h +SYMLINK-$(CONFIG_RTE_LIBRTE_ACL)-include += rte_acl.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_ACL) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_acl/acl.h b/lib/librte_acl/acl.h new file mode 100644 index 00000000..09d67841 --- /dev/null +++ b/lib/librte_acl/acl.h @@ -0,0 +1,241 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ACL_H_ +#define _ACL_H_ + +#ifdef __cplusplus +extern"C" { +#endif /* __cplusplus */ + +#define RTE_ACL_QUAD_MAX 5 +#define RTE_ACL_QUAD_SIZE 4 +#define RTE_ACL_QUAD_SINGLE UINT64_C(0x7f7f7f7f00000000) + +#define RTE_ACL_SINGLE_TRIE_SIZE 2000 + +#define RTE_ACL_DFA_MAX UINT8_MAX +#define RTE_ACL_DFA_SIZE (UINT8_MAX + 1) + +#define RTE_ACL_DFA_GR64_SIZE 64 +#define RTE_ACL_DFA_GR64_NUM (RTE_ACL_DFA_SIZE / RTE_ACL_DFA_GR64_SIZE) +#define RTE_ACL_DFA_GR64_BIT \ + (CHAR_BIT * sizeof(uint32_t) / RTE_ACL_DFA_GR64_NUM) + +typedef int bits_t; + +#define RTE_ACL_BIT_SET_SIZE ((UINT8_MAX + 1) / (sizeof(bits_t) * CHAR_BIT)) + +struct rte_acl_bitset { + bits_t bits[RTE_ACL_BIT_SET_SIZE]; +}; + +#define RTE_ACL_NODE_DFA (0 << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_SINGLE (1U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_QRANGE (3U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_MATCH (4U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_TYPE (7U << RTE_ACL_TYPE_SHIFT) +#define RTE_ACL_NODE_UNDEFINED UINT32_MAX + +/* + * ACL RT structure is a set of multibit tries (with stride == 8) + * represented by an array of transitions. The next position is calculated + * based on the current position and the input byte. + * Each transition is 64 bit value with the following format: + * | node_type_specific : 32 | node_type : 3 | node_addr : 29 | + * For all node types except RTE_ACL_NODE_MATCH, node_addr is an index + * to the start of the node in the transtions array. + * Few different node types are used: + * RTE_ACL_NODE_MATCH: + * node_addr value is and index into an array that contains the return value + * and its priority for each category. + * Upper 32 bits of the transition value are not used for that node type. + * RTE_ACL_NODE_QRANGE: + * that node consist of up to 5 transitions. + * Upper 32 bits are interpreted as 4 signed character values which + * are ordered from smallest(INT8_MIN) to largest (INT8_MAX). + * These values define 5 ranges: + * INT8_MIN <= range[0] <= ((int8_t *)&transition)[4] + * ((int8_t *)&transition)[4] < range[1] <= ((int8_t *)&transition)[5] + * ((int8_t *)&transition)[5] < range[2] <= ((int8_t *)&transition)[6] + * ((int8_t *)&transition)[6] < range[3] <= ((int8_t *)&transition)[7] + * ((int8_t *)&transition)[7] < range[4] <= INT8_MAX + * So for input byte value within range[i] i-th transition within that node + * will be used. + * RTE_ACL_NODE_SINGLE: + * always transitions to the same node regardless of the input value. + * RTE_ACL_NODE_DFA: + * that node consits of up to 256 transitions. + * In attempt to conserve space all transitions are divided into 4 consecutive + * groups, by 64 transitions per group: + * group64[i] contains transitions[i * 64, .. i * 64 + 63]. + * Upper 32 bits are interpreted as 4 unsigned character values one per group, + * which contain index to the start of the given group within the node. + * So to calculate transition index within the node for given input byte value: + * input_byte - ((uint8_t *)&transition)[4 + input_byte / 64]. + */ + +/* + * Structure of a node is a set of ptrs and each ptr has a bit map + * of values associated with this transition. + */ +struct rte_acl_ptr_set { + struct rte_acl_bitset values; /* input values associated with ptr */ + struct rte_acl_node *ptr; /* transition to next node */ +}; + +struct rte_acl_classifier_results { + int results[RTE_ACL_MAX_CATEGORIES]; +}; + +struct rte_acl_match_results { + uint32_t results[RTE_ACL_MAX_CATEGORIES]; + int32_t priority[RTE_ACL_MAX_CATEGORIES]; +}; + +struct rte_acl_node { + uint64_t node_index; /* index for this node */ + uint32_t level; /* level 0-n in the trie */ + uint32_t ref_count; /* ref count for this node */ + struct rte_acl_bitset values; + /* set of all values that map to another node + * (union of bits in each transition. + */ + uint32_t num_ptrs; /* number of ptr_set in use */ + uint32_t max_ptrs; /* number of allocated ptr_set */ + uint32_t min_add; /* number of ptr_set per allocation */ + struct rte_acl_ptr_set *ptrs; /* transitions array for this node */ + int32_t match_flag; + int32_t match_index; /* index to match data */ + uint32_t node_type; + int32_t fanout; + /* number of ranges (transitions w/ consecutive bits) */ + int32_t id; + struct rte_acl_match_results *mrt; /* only valid when match_flag != 0 */ + union { + char transitions[RTE_ACL_QUAD_SIZE]; + /* boundaries for ranged node */ + uint8_t dfa_gr64[RTE_ACL_DFA_GR64_NUM]; + }; + struct rte_acl_node *next; + /* free list link or pointer to duplicate node during merge */ + struct rte_acl_node *prev; + /* points to node from which this node was duplicated */ +}; + +/* + * Types of tries used to generate runtime structure(s) + */ +enum { + RTE_ACL_FULL_TRIE = 0, + RTE_ACL_NOSRC_TRIE = 1, + RTE_ACL_NODST_TRIE = 2, + RTE_ACL_NOPORTS_TRIE = 4, + RTE_ACL_NOVLAN_TRIE = 8, + RTE_ACL_UNUSED_TRIE = 0x80000000 +}; + + +/** MAX number of tries per one ACL context.*/ +#define RTE_ACL_MAX_TRIES 8 + +/** Max number of characters in PM name.*/ +#define RTE_ACL_NAMESIZE 32 + + +struct rte_acl_trie { + uint32_t type; + uint32_t count; + uint32_t root_index; + const uint32_t *data_index; + uint32_t num_data_indexes; +}; + +struct rte_acl_bld_trie { + struct rte_acl_node *trie; +}; + +struct rte_acl_ctx { + char name[RTE_ACL_NAMESIZE]; + /** Name of the ACL context. */ + int32_t socket_id; + /** Socket ID to allocate memory from. */ + enum rte_acl_classify_alg alg; + void *rules; + uint32_t max_rules; + uint32_t rule_sz; + uint32_t num_rules; + uint32_t num_categories; + uint32_t num_tries; + uint32_t match_index; + uint64_t no_match; + uint64_t idle; + uint64_t *trans_table; + uint32_t *data_indexes; + struct rte_acl_trie trie[RTE_ACL_MAX_TRIES]; + void *mem; + size_t mem_sz; + struct rte_acl_config config; /* copy of build config. */ +}; + +int rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, + struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, + uint32_t num_categories, uint32_t data_index_sz, size_t max_size); + +typedef int (*rte_acl_classify_t) +(const struct rte_acl_ctx *, const uint8_t **, uint32_t *, uint32_t, uint32_t); + +/* + * Different implementations of ACL classify. + */ +int +rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +int +rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +int +rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +int +rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories); + +#ifdef __cplusplus +} +#endif /* __cplusplus */ + +#endif /* _ACL_H_ */ diff --git a/lib/librte_acl/acl_bld.c b/lib/librte_acl/acl_bld.c new file mode 100644 index 00000000..9e5ad1c6 --- /dev/null +++ b/lib/librte_acl/acl_bld.c @@ -0,0 +1,1598 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "tb_mem.h" +#include "acl.h" + +#define ACL_POOL_ALIGN 8 +#define ACL_POOL_ALLOC_MIN 0x800000 + +/* number of pointers per alloc */ +#define ACL_PTR_ALLOC 32 + +/* macros for dividing rule sets heuristics */ +#define NODE_MAX 0x4000 +#define NODE_MIN 0x800 + +/* TALLY are statistics per field */ +enum { + TALLY_0 = 0, /* number of rules that are 0% or more wild. */ + TALLY_25, /* number of rules that are 25% or more wild. */ + TALLY_50, + TALLY_75, + TALLY_100, + TALLY_DEACTIVATED, /* deactivated fields (100% wild in all rules). */ + TALLY_DEPTH, + /* number of rules that are 100% wild for this field and higher. */ + TALLY_NUM +}; + +static const uint32_t wild_limits[TALLY_DEACTIVATED] = {0, 25, 50, 75, 100}; + +enum { + ACL_INTERSECT_NONE = 0, + ACL_INTERSECT_A = 1, /* set A is a superset of A and B intersect */ + ACL_INTERSECT_B = 2, /* set B is a superset of A and B intersect */ + ACL_INTERSECT = 4, /* sets A and B intersect */ +}; + +enum { + ACL_PRIORITY_EQUAL = 0, + ACL_PRIORITY_NODE_A = 1, + ACL_PRIORITY_NODE_B = 2, + ACL_PRIORITY_MIXED = 3 +}; + + +struct acl_mem_block { + uint32_t block_size; + void *mem_ptr; +}; + +#define MEM_BLOCK_NUM 16 + +/* Single ACL rule, build representation.*/ +struct rte_acl_build_rule { + struct rte_acl_build_rule *next; + struct rte_acl_config *config; + /**< configuration for each field in the rule. */ + const struct rte_acl_rule *f; + uint32_t *wildness; +}; + +/* Context for build phase */ +struct acl_build_context { + const struct rte_acl_ctx *acx; + struct rte_acl_build_rule *build_rules; + struct rte_acl_config cfg; + int32_t node_max; + int32_t cur_node_max; + uint32_t node; + uint32_t num_nodes; + uint32_t category_mask; + uint32_t num_rules; + uint32_t node_id; + uint32_t src_mask; + uint32_t num_build_rules; + uint32_t num_tries; + struct tb_mem_pool pool; + struct rte_acl_trie tries[RTE_ACL_MAX_TRIES]; + struct rte_acl_bld_trie bld_tries[RTE_ACL_MAX_TRIES]; + uint32_t data_indexes[RTE_ACL_MAX_TRIES][RTE_ACL_MAX_FIELDS]; + + /* memory free lists for nodes and blocks used for node ptrs */ + struct acl_mem_block blocks[MEM_BLOCK_NUM]; + struct rte_acl_node *node_free_list; +}; + +static int acl_merge_trie(struct acl_build_context *context, + struct rte_acl_node *node_a, struct rte_acl_node *node_b, + uint32_t level, struct rte_acl_node **node_c); + +static void +acl_deref_ptr(struct acl_build_context *context, + struct rte_acl_node *node, int index); + +static void * +acl_build_alloc(struct acl_build_context *context, size_t n, size_t s) +{ + uint32_t m; + void *p; + size_t alloc_size = n * s; + + /* + * look for memory in free lists + */ + for (m = 0; m < RTE_DIM(context->blocks); m++) { + if (context->blocks[m].block_size == + alloc_size && context->blocks[m].mem_ptr != NULL) { + p = context->blocks[m].mem_ptr; + context->blocks[m].mem_ptr = *((void **)p); + memset(p, 0, alloc_size); + return p; + } + } + + /* + * return allocation from memory pool + */ + p = tb_alloc(&context->pool, alloc_size); + return p; +} + +/* + * Free memory blocks (kept in context for reuse). + */ +static void +acl_build_free(struct acl_build_context *context, size_t s, void *p) +{ + uint32_t n; + + for (n = 0; n < RTE_DIM(context->blocks); n++) { + if (context->blocks[n].block_size == s) { + *((void **)p) = context->blocks[n].mem_ptr; + context->blocks[n].mem_ptr = p; + return; + } + } + for (n = 0; n < RTE_DIM(context->blocks); n++) { + if (context->blocks[n].block_size == 0) { + context->blocks[n].block_size = s; + *((void **)p) = NULL; + context->blocks[n].mem_ptr = p; + return; + } + } +} + +/* + * Allocate and initialize a new node. + */ +static struct rte_acl_node * +acl_alloc_node(struct acl_build_context *context, int level) +{ + struct rte_acl_node *node; + + if (context->node_free_list != NULL) { + node = context->node_free_list; + context->node_free_list = node->next; + memset(node, 0, sizeof(struct rte_acl_node)); + } else { + node = acl_build_alloc(context, sizeof(struct rte_acl_node), 1); + } + + if (node != NULL) { + node->num_ptrs = 0; + node->level = level; + node->node_type = RTE_ACL_NODE_UNDEFINED; + node->node_index = RTE_ACL_NODE_UNDEFINED; + context->num_nodes++; + node->id = context->node_id++; + } + return node; +} + +/* + * Dereference all nodes to which this node points + */ +static void +acl_free_node(struct acl_build_context *context, + struct rte_acl_node *node) +{ + uint32_t n; + + if (node->prev != NULL) + node->prev->next = NULL; + for (n = 0; n < node->num_ptrs; n++) + acl_deref_ptr(context, node, n); + + /* free mrt if this is a match node */ + if (node->mrt != NULL) { + acl_build_free(context, sizeof(struct rte_acl_match_results), + node->mrt); + node->mrt = NULL; + } + + /* free transitions to other nodes */ + if (node->ptrs != NULL) { + acl_build_free(context, + node->max_ptrs * sizeof(struct rte_acl_ptr_set), + node->ptrs); + node->ptrs = NULL; + } + + /* put it on the free list */ + context->num_nodes--; + node->next = context->node_free_list; + context->node_free_list = node; +} + + +/* + * Include src bitset in dst bitset + */ +static void +acl_include(struct rte_acl_bitset *dst, struct rte_acl_bitset *src, bits_t mask) +{ + uint32_t n; + + for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) + dst->bits[n] = (dst->bits[n] & mask) | src->bits[n]; +} + +/* + * Set dst to bits of src1 that are not in src2 + */ +static int +acl_exclude(struct rte_acl_bitset *dst, + struct rte_acl_bitset *src1, + struct rte_acl_bitset *src2) +{ + uint32_t n; + bits_t all_bits = 0; + + for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) { + dst->bits[n] = src1->bits[n] & ~src2->bits[n]; + all_bits |= dst->bits[n]; + } + return all_bits != 0; +} + +/* + * Add a pointer (ptr) to a node. + */ +static int +acl_add_ptr(struct acl_build_context *context, + struct rte_acl_node *node, + struct rte_acl_node *ptr, + struct rte_acl_bitset *bits) +{ + uint32_t n, num_ptrs; + struct rte_acl_ptr_set *ptrs = NULL; + + /* + * If there's already a pointer to the same node, just add to the bitset + */ + for (n = 0; n < node->num_ptrs; n++) { + if (node->ptrs[n].ptr != NULL) { + if (node->ptrs[n].ptr == ptr) { + acl_include(&node->ptrs[n].values, bits, -1); + acl_include(&node->values, bits, -1); + return 0; + } + } + } + + /* if there's no room for another pointer, make room */ + if (node->num_ptrs >= node->max_ptrs) { + /* add room for more pointers */ + num_ptrs = node->max_ptrs + ACL_PTR_ALLOC; + ptrs = acl_build_alloc(context, num_ptrs, sizeof(*ptrs)); + + /* copy current points to new memory allocation */ + if (node->ptrs != NULL) { + memcpy(ptrs, node->ptrs, + node->num_ptrs * sizeof(*ptrs)); + acl_build_free(context, node->max_ptrs * sizeof(*ptrs), + node->ptrs); + } + node->ptrs = ptrs; + node->max_ptrs = num_ptrs; + } + + /* Find available ptr and add a new pointer to this node */ + for (n = node->min_add; n < node->max_ptrs; n++) { + if (node->ptrs[n].ptr == NULL) { + node->ptrs[n].ptr = ptr; + acl_include(&node->ptrs[n].values, bits, 0); + acl_include(&node->values, bits, -1); + if (ptr != NULL) + ptr->ref_count++; + if (node->num_ptrs <= n) + node->num_ptrs = n + 1; + return 0; + } + } + + return 0; +} + +/* + * Add a pointer for a range of values + */ +static int +acl_add_ptr_range(struct acl_build_context *context, + struct rte_acl_node *root, + struct rte_acl_node *node, + uint8_t low, + uint8_t high) +{ + uint32_t n; + struct rte_acl_bitset bitset; + + /* clear the bitset values */ + for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) + bitset.bits[n] = 0; + + /* for each bit in range, add bit to set */ + for (n = 0; n < UINT8_MAX + 1; n++) + if (n >= low && n <= high) + bitset.bits[n / (sizeof(bits_t) * 8)] |= + 1 << (n % (sizeof(bits_t) * 8)); + + return acl_add_ptr(context, root, node, &bitset); +} + +/* + * Generate a bitset from a byte value and mask. + */ +static int +acl_gen_mask(struct rte_acl_bitset *bitset, uint32_t value, uint32_t mask) +{ + int range = 0; + uint32_t n; + + /* clear the bitset values */ + for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) + bitset->bits[n] = 0; + + /* for each bit in value/mask, add bit to set */ + for (n = 0; n < UINT8_MAX + 1; n++) { + if ((n & mask) == value) { + range++; + bitset->bits[n / (sizeof(bits_t) * 8)] |= + 1 << (n % (sizeof(bits_t) * 8)); + } + } + return range; +} + +/* + * Determine how A and B intersect. + * Determine if A and/or B are supersets of the intersection. + */ +static int +acl_intersect_type(const struct rte_acl_bitset *a_bits, + const struct rte_acl_bitset *b_bits, + struct rte_acl_bitset *intersect) +{ + uint32_t n; + bits_t intersect_bits = 0; + bits_t a_superset = 0; + bits_t b_superset = 0; + + /* + * calculate and store intersection and check if A and/or B have + * bits outside the intersection (superset) + */ + for (n = 0; n < RTE_ACL_BIT_SET_SIZE; n++) { + intersect->bits[n] = a_bits->bits[n] & b_bits->bits[n]; + a_superset |= a_bits->bits[n] ^ intersect->bits[n]; + b_superset |= b_bits->bits[n] ^ intersect->bits[n]; + intersect_bits |= intersect->bits[n]; + } + + n = (intersect_bits == 0 ? ACL_INTERSECT_NONE : ACL_INTERSECT) | + (b_superset == 0 ? 0 : ACL_INTERSECT_B) | + (a_superset == 0 ? 0 : ACL_INTERSECT_A); + + return n; +} + +/* + * Duplicate a node + */ +static struct rte_acl_node * +acl_dup_node(struct acl_build_context *context, struct rte_acl_node *node) +{ + uint32_t n; + struct rte_acl_node *next; + + next = acl_alloc_node(context, node->level); + + /* allocate the pointers */ + if (node->num_ptrs > 0) { + next->ptrs = acl_build_alloc(context, + node->max_ptrs, + sizeof(struct rte_acl_ptr_set)); + next->max_ptrs = node->max_ptrs; + } + + /* copy over the pointers */ + for (n = 0; n < node->num_ptrs; n++) { + if (node->ptrs[n].ptr != NULL) { + next->ptrs[n].ptr = node->ptrs[n].ptr; + next->ptrs[n].ptr->ref_count++; + acl_include(&next->ptrs[n].values, + &node->ptrs[n].values, -1); + } + } + + next->num_ptrs = node->num_ptrs; + + /* copy over node's match results */ + if (node->match_flag == 0) + next->match_flag = 0; + else { + next->match_flag = -1; + next->mrt = acl_build_alloc(context, 1, sizeof(*next->mrt)); + memcpy(next->mrt, node->mrt, sizeof(*next->mrt)); + } + + /* copy over node's bitset */ + acl_include(&next->values, &node->values, -1); + + node->next = next; + next->prev = node; + + return next; +} + +/* + * Dereference a pointer from a node + */ +static void +acl_deref_ptr(struct acl_build_context *context, + struct rte_acl_node *node, int index) +{ + struct rte_acl_node *ref_node; + + /* De-reference the node at the specified pointer */ + if (node != NULL && node->ptrs[index].ptr != NULL) { + ref_node = node->ptrs[index].ptr; + ref_node->ref_count--; + if (ref_node->ref_count == 0) + acl_free_node(context, ref_node); + } +} + +/* + * acl_exclude rte_acl_bitset from src and copy remaining pointer to dst + */ +static int +acl_copy_ptr(struct acl_build_context *context, + struct rte_acl_node *dst, + struct rte_acl_node *src, + int index, + struct rte_acl_bitset *b_bits) +{ + int rc; + struct rte_acl_bitset bits; + + if (b_bits != NULL) + if (!acl_exclude(&bits, &src->ptrs[index].values, b_bits)) + return 0; + + rc = acl_add_ptr(context, dst, src->ptrs[index].ptr, &bits); + if (rc < 0) + return rc; + return 1; +} + +/* + * Fill in gaps in ptrs list with the ptr at the end of the list + */ +static void +acl_compact_node_ptrs(struct rte_acl_node *node_a) +{ + uint32_t n; + int min_add = node_a->min_add; + + while (node_a->num_ptrs > 0 && + node_a->ptrs[node_a->num_ptrs - 1].ptr == NULL) + node_a->num_ptrs--; + + for (n = min_add; n + 1 < node_a->num_ptrs; n++) { + + /* if this entry is empty */ + if (node_a->ptrs[n].ptr == NULL) { + + /* move the last pointer to this entry */ + acl_include(&node_a->ptrs[n].values, + &node_a->ptrs[node_a->num_ptrs - 1].values, + 0); + node_a->ptrs[n].ptr = + node_a->ptrs[node_a->num_ptrs - 1].ptr; + + /* + * mark the end as empty and adjust the number + * of used pointer enum_tries + */ + node_a->ptrs[node_a->num_ptrs - 1].ptr = NULL; + while (node_a->num_ptrs > 0 && + node_a->ptrs[node_a->num_ptrs - 1].ptr == NULL) + node_a->num_ptrs--; + } + } +} + +static int +acl_resolve_leaf(struct acl_build_context *context, + struct rte_acl_node *node_a, + struct rte_acl_node *node_b, + struct rte_acl_node **node_c) +{ + uint32_t n; + int combined_priority = ACL_PRIORITY_EQUAL; + + for (n = 0; n < context->cfg.num_categories; n++) { + if (node_a->mrt->priority[n] != node_b->mrt->priority[n]) { + combined_priority |= (node_a->mrt->priority[n] > + node_b->mrt->priority[n]) ? + ACL_PRIORITY_NODE_A : ACL_PRIORITY_NODE_B; + } + } + + /* + * if node a is higher or equal priority for all categories, + * then return node_a. + */ + if (combined_priority == ACL_PRIORITY_NODE_A || + combined_priority == ACL_PRIORITY_EQUAL) { + *node_c = node_a; + return 0; + } + + /* + * if node b is higher or equal priority for all categories, + * then return node_b. + */ + if (combined_priority == ACL_PRIORITY_NODE_B) { + *node_c = node_b; + return 0; + } + + /* + * mixed priorities - create a new node with the highest priority + * for each category. + */ + + /* force new duplication. */ + node_a->next = NULL; + + *node_c = acl_dup_node(context, node_a); + for (n = 0; n < context->cfg.num_categories; n++) { + if ((*node_c)->mrt->priority[n] < node_b->mrt->priority[n]) { + (*node_c)->mrt->priority[n] = node_b->mrt->priority[n]; + (*node_c)->mrt->results[n] = node_b->mrt->results[n]; + } + } + return 0; +} + +/* + * Merge nodes A and B together, + * returns a node that is the path for the intersection + * + * If match node (leaf on trie) + * For each category + * return node = highest priority result + * + * Create C as a duplicate of A to point to child intersections + * If any pointers in C intersect with any in B + * For each intersection + * merge children + * remove intersection from C pointer + * add a pointer from C to child intersection node + * Compact the pointers in A and B + * Copy any B pointers that are outside of the intersection to C + * If C has no references to the B trie + * free C and return A + * Else If C has no references to the A trie + * free C and return B + * Else + * return C + */ +static int +acl_merge_trie(struct acl_build_context *context, + struct rte_acl_node *node_a, struct rte_acl_node *node_b, + uint32_t level, struct rte_acl_node **return_c) +{ + uint32_t n, m, ptrs_c, ptrs_b; + uint32_t min_add_c, min_add_b; + int node_intersect_type; + struct rte_acl_bitset node_intersect; + struct rte_acl_node *node_c; + struct rte_acl_node *node_a_next; + int node_b_refs; + int node_a_refs; + + node_c = node_a; + node_a_next = node_a->next; + min_add_c = 0; + min_add_b = 0; + node_a_refs = node_a->num_ptrs; + node_b_refs = 0; + node_intersect_type = 0; + + /* Resolve leaf nodes (matches) */ + if (node_a->match_flag != 0) { + acl_resolve_leaf(context, node_a, node_b, return_c); + return 0; + } + + /* + * Create node C as a copy of node A, and do: C = merge(A,B); + * If node A can be used instead (A==C), then later we'll + * destroy C and return A. + */ + if (level > 0) + node_c = acl_dup_node(context, node_a); + + /* + * If the two node transitions intersect then merge the transitions. + * Check intersection for entire node (all pointers) + */ + node_intersect_type = acl_intersect_type(&node_c->values, + &node_b->values, + &node_intersect); + + if (node_intersect_type & ACL_INTERSECT) { + + min_add_b = node_b->min_add; + node_b->min_add = node_b->num_ptrs; + ptrs_b = node_b->num_ptrs; + + min_add_c = node_c->min_add; + node_c->min_add = node_c->num_ptrs; + ptrs_c = node_c->num_ptrs; + + for (n = 0; n < ptrs_c; n++) { + if (node_c->ptrs[n].ptr == NULL) { + node_a_refs--; + continue; + } + node_c->ptrs[n].ptr->next = NULL; + for (m = 0; m < ptrs_b; m++) { + + struct rte_acl_bitset child_intersect; + int child_intersect_type; + struct rte_acl_node *child_node_c = NULL; + + if (node_b->ptrs[m].ptr == NULL || + node_c->ptrs[n].ptr == + node_b->ptrs[m].ptr) + continue; + + child_intersect_type = acl_intersect_type( + &node_c->ptrs[n].values, + &node_b->ptrs[m].values, + &child_intersect); + + if ((child_intersect_type & ACL_INTERSECT) != + 0) { + if (acl_merge_trie(context, + node_c->ptrs[n].ptr, + node_b->ptrs[m].ptr, + level + 1, + &child_node_c)) + return 1; + + if (child_node_c != NULL && + child_node_c != + node_c->ptrs[n].ptr) { + + node_b_refs++; + + /* + * Added link from C to + * child_C for all transitions + * in the intersection. + */ + acl_add_ptr(context, node_c, + child_node_c, + &child_intersect); + + /* + * inc refs if pointer is not + * to node b. + */ + node_a_refs += (child_node_c != + node_b->ptrs[m].ptr); + + /* + * Remove intersection from C + * pointer. + */ + if (!acl_exclude( + &node_c->ptrs[n].values, + &node_c->ptrs[n].values, + &child_intersect)) { + acl_deref_ptr(context, + node_c, n); + node_c->ptrs[n].ptr = + NULL; + node_a_refs--; + } + } + } + } + } + + /* Compact pointers */ + node_c->min_add = min_add_c; + acl_compact_node_ptrs(node_c); + node_b->min_add = min_add_b; + acl_compact_node_ptrs(node_b); + } + + /* + * Copy pointers outside of the intersection from B to C + */ + if ((node_intersect_type & ACL_INTERSECT_B) != 0) { + node_b_refs++; + for (m = 0; m < node_b->num_ptrs; m++) + if (node_b->ptrs[m].ptr != NULL) + acl_copy_ptr(context, node_c, + node_b, m, &node_intersect); + } + + /* + * Free node C if top of trie is contained in A or B + * if node C is a duplicate of node A && + * node C was not an existing duplicate + */ + if (node_c != node_a && node_c != node_a_next) { + + /* + * if the intersection has no references to the + * B side, then it is contained in A + */ + if (node_b_refs == 0) { + acl_free_node(context, node_c); + node_c = node_a; + } else { + /* + * if the intersection has no references to the + * A side, then it is contained in B. + */ + if (node_a_refs == 0) { + acl_free_node(context, node_c); + node_c = node_b; + } + } + } + + if (return_c != NULL) + *return_c = node_c; + + if (level == 0) + acl_free_node(context, node_b); + + return 0; +} + +/* + * Reset current runtime fields before next build: + * - free allocated RT memory. + * - reset all RT related fields to zero. + */ +static void +acl_build_reset(struct rte_acl_ctx *ctx) +{ + rte_free(ctx->mem); + memset(&ctx->num_categories, 0, + sizeof(*ctx) - offsetof(struct rte_acl_ctx, num_categories)); +} + +static void +acl_gen_range(struct acl_build_context *context, + const uint8_t *hi, const uint8_t *lo, int size, int level, + struct rte_acl_node *root, struct rte_acl_node *end) +{ + struct rte_acl_node *node, *prev; + uint32_t n; + + prev = root; + for (n = size - 1; n > 0; n--) { + node = acl_alloc_node(context, level++); + acl_add_ptr_range(context, prev, node, lo[n], hi[n]); + prev = node; + } + acl_add_ptr_range(context, prev, end, lo[0], hi[0]); +} + +static struct rte_acl_node * +acl_gen_range_trie(struct acl_build_context *context, + const void *min, const void *max, + int size, int level, struct rte_acl_node **pend) +{ + int32_t n; + struct rte_acl_node *root; + const uint8_t *lo = (const uint8_t *)min; + const uint8_t *hi = (const uint8_t *)max; + + *pend = acl_alloc_node(context, level+size); + root = acl_alloc_node(context, level++); + + if (lo[size - 1] == hi[size - 1]) { + acl_gen_range(context, hi, lo, size, level, root, *pend); + } else { + uint8_t limit_lo[64]; + uint8_t limit_hi[64]; + uint8_t hi_ff = UINT8_MAX; + uint8_t lo_00 = 0; + + memset(limit_lo, 0, RTE_DIM(limit_lo)); + memset(limit_hi, UINT8_MAX, RTE_DIM(limit_hi)); + + for (n = size - 2; n >= 0; n--) { + hi_ff = (uint8_t)(hi_ff & hi[n]); + lo_00 = (uint8_t)(lo_00 | lo[n]); + } + + if (hi_ff != UINT8_MAX) { + limit_lo[size - 1] = hi[size - 1]; + acl_gen_range(context, hi, limit_lo, size, level, + root, *pend); + } + + if (lo_00 != 0) { + limit_hi[size - 1] = lo[size - 1]; + acl_gen_range(context, limit_hi, lo, size, level, + root, *pend); + } + + if (hi[size - 1] - lo[size - 1] > 1 || + lo_00 == 0 || + hi_ff == UINT8_MAX) { + limit_lo[size-1] = (uint8_t)(lo[size-1] + (lo_00 != 0)); + limit_hi[size-1] = (uint8_t)(hi[size-1] - + (hi_ff != UINT8_MAX)); + acl_gen_range(context, limit_hi, limit_lo, size, + level, root, *pend); + } + } + return root; +} + +static struct rte_acl_node * +acl_gen_mask_trie(struct acl_build_context *context, + const void *value, const void *mask, + int size, int level, struct rte_acl_node **pend) +{ + int32_t n; + struct rte_acl_node *root; + struct rte_acl_node *node, *prev; + struct rte_acl_bitset bits; + const uint8_t *val = (const uint8_t *)value; + const uint8_t *msk = (const uint8_t *)mask; + + root = acl_alloc_node(context, level++); + prev = root; + + for (n = size - 1; n >= 0; n--) { + node = acl_alloc_node(context, level++); + acl_gen_mask(&bits, val[n] & msk[n], msk[n]); + acl_add_ptr(context, prev, node, &bits); + prev = node; + } + + *pend = prev; + return root; +} + +static struct rte_acl_node * +build_trie(struct acl_build_context *context, struct rte_acl_build_rule *head, + struct rte_acl_build_rule **last, uint32_t *count) +{ + uint32_t n, m; + int field_index, node_count; + struct rte_acl_node *trie; + struct rte_acl_build_rule *prev, *rule; + struct rte_acl_node *end, *merge, *root, *end_prev; + const struct rte_acl_field *fld; + + prev = head; + rule = head; + *last = prev; + + trie = acl_alloc_node(context, 0); + + while (rule != NULL) { + + root = acl_alloc_node(context, 0); + + root->ref_count = 1; + end = root; + + for (n = 0; n < rule->config->num_fields; n++) { + + field_index = rule->config->defs[n].field_index; + fld = rule->f->field + field_index; + end_prev = end; + + /* build a mini-trie for this field */ + switch (rule->config->defs[n].type) { + + case RTE_ACL_FIELD_TYPE_BITMASK: + merge = acl_gen_mask_trie(context, + &fld->value, + &fld->mask_range, + rule->config->defs[n].size, + end->level + 1, + &end); + break; + + case RTE_ACL_FIELD_TYPE_MASK: + { + /* + * set msb for the size of the field and + * all higher bits. + */ + uint64_t mask; + mask = RTE_ACL_MASKLEN_TO_BITMASK( + fld->mask_range.u32, + rule->config->defs[n].size); + + /* gen a mini-trie for this field */ + merge = acl_gen_mask_trie(context, + &fld->value, + (char *)&mask, + rule->config->defs[n].size, + end->level + 1, + &end); + } + break; + + case RTE_ACL_FIELD_TYPE_RANGE: + merge = acl_gen_range_trie(context, + &rule->f->field[field_index].value, + &rule->f->field[field_index].mask_range, + rule->config->defs[n].size, + end->level + 1, + &end); + break; + + default: + RTE_LOG(ERR, ACL, + "Error in rule[%u] type - %hhu\n", + rule->f->data.userdata, + rule->config->defs[n].type); + return NULL; + } + + /* merge this field on to the end of the rule */ + if (acl_merge_trie(context, end_prev, merge, 0, + NULL) != 0) { + return NULL; + } + } + + end->match_flag = ++context->num_build_rules; + + /* + * Setup the results for this rule. + * The result and priority of each category. + */ + if (end->mrt == NULL) + end->mrt = acl_build_alloc(context, 1, + sizeof(*end->mrt)); + + for (m = context->cfg.num_categories; 0 != m--; ) { + if (rule->f->data.category_mask & (1 << m)) { + end->mrt->results[m] = rule->f->data.userdata; + end->mrt->priority[m] = rule->f->data.priority; + } else { + end->mrt->results[m] = 0; + end->mrt->priority[m] = 0; + } + } + + node_count = context->num_nodes; + (*count)++; + + /* merge this rule into the trie */ + if (acl_merge_trie(context, trie, root, 0, NULL)) + return NULL; + + node_count = context->num_nodes - node_count; + if (node_count > context->cur_node_max) { + *last = prev; + return trie; + } + + prev = rule; + rule = rule->next; + } + + *last = NULL; + return trie; +} + +static void +acl_calc_wildness(struct rte_acl_build_rule *head, + const struct rte_acl_config *config) +{ + uint32_t n; + struct rte_acl_build_rule *rule; + + for (rule = head; rule != NULL; rule = rule->next) { + + for (n = 0; n < config->num_fields; n++) { + + double wild = 0; + uint32_t bit_len = CHAR_BIT * config->defs[n].size; + uint64_t msk_val = RTE_LEN2MASK(bit_len, + typeof(msk_val)); + double size = bit_len; + int field_index = config->defs[n].field_index; + const struct rte_acl_field *fld = rule->f->field + + field_index; + + switch (rule->config->defs[n].type) { + case RTE_ACL_FIELD_TYPE_BITMASK: + wild = (size - __builtin_popcountll( + fld->mask_range.u64 & msk_val)) / + size; + break; + + case RTE_ACL_FIELD_TYPE_MASK: + wild = (size - fld->mask_range.u32) / size; + break; + + case RTE_ACL_FIELD_TYPE_RANGE: + wild = (fld->mask_range.u64 & msk_val) - + (fld->value.u64 & msk_val); + wild = wild / msk_val; + break; + } + + rule->wildness[field_index] = (uint32_t)(wild * 100); + } + } +} + +static void +acl_rule_stats(struct rte_acl_build_rule *head, struct rte_acl_config *config) +{ + struct rte_acl_build_rule *rule; + uint32_t n, m, fields_deactivated = 0; + uint32_t start = 0, deactivate = 0; + int tally[RTE_ACL_MAX_LEVELS][TALLY_NUM]; + + memset(tally, 0, sizeof(tally)); + + for (rule = head; rule != NULL; rule = rule->next) { + + for (n = 0; n < config->num_fields; n++) { + uint32_t field_index = config->defs[n].field_index; + + tally[n][TALLY_0]++; + for (m = 1; m < RTE_DIM(wild_limits); m++) { + if (rule->wildness[field_index] >= + wild_limits[m]) + tally[n][m]++; + } + } + + for (n = config->num_fields - 1; n > 0; n--) { + uint32_t field_index = config->defs[n].field_index; + + if (rule->wildness[field_index] == 100) + tally[n][TALLY_DEPTH]++; + else + break; + } + } + + /* + * Look for any field that is always wild and drop it from the config + * Only deactivate if all fields for a given input loop are deactivated. + */ + for (n = 1; n < config->num_fields; n++) { + if (config->defs[n].input_index != + config->defs[n - 1].input_index) { + for (m = start; m < n; m++) + tally[m][TALLY_DEACTIVATED] = deactivate; + fields_deactivated += deactivate; + start = n; + deactivate = 1; + } + + /* if the field is not always completely wild */ + if (tally[n][TALLY_100] != tally[n][TALLY_0]) + deactivate = 0; + } + + for (m = start; m < n; m++) + tally[m][TALLY_DEACTIVATED] = deactivate; + + fields_deactivated += deactivate; + + /* remove deactivated fields */ + if (fields_deactivated) { + uint32_t k, l = 0; + + for (k = 0; k < config->num_fields; k++) { + if (tally[k][TALLY_DEACTIVATED] == 0) { + memmove(&tally[l][0], &tally[k][0], + TALLY_NUM * sizeof(tally[0][0])); + memmove(&config->defs[l++], + &config->defs[k], + sizeof(struct rte_acl_field_def)); + } + } + config->num_fields = l; + } +} + +static int +rule_cmp_wildness(struct rte_acl_build_rule *r1, struct rte_acl_build_rule *r2) +{ + uint32_t n; + + for (n = 1; n < r1->config->num_fields; n++) { + int field_index = r1->config->defs[n].field_index; + + if (r1->wildness[field_index] != r2->wildness[field_index]) + return r1->wildness[field_index] - + r2->wildness[field_index]; + } + return 0; +} + +/* + * Split the rte_acl_build_rule list into two lists. + */ +static void +rule_list_split(struct rte_acl_build_rule *source, + struct rte_acl_build_rule **list_a, + struct rte_acl_build_rule **list_b) +{ + struct rte_acl_build_rule *fast; + struct rte_acl_build_rule *slow; + + if (source == NULL || source->next == NULL) { + /* length < 2 cases */ + *list_a = source; + *list_b = NULL; + } else { + slow = source; + fast = source->next; + /* Advance 'fast' two nodes, and advance 'slow' one node */ + while (fast != NULL) { + fast = fast->next; + if (fast != NULL) { + slow = slow->next; + fast = fast->next; + } + } + /* 'slow' is before the midpoint in the list, so split it in two + at that point. */ + *list_a = source; + *list_b = slow->next; + slow->next = NULL; + } +} + +/* + * Merge two sorted lists. + */ +static struct rte_acl_build_rule * +rule_list_sorted_merge(struct rte_acl_build_rule *a, + struct rte_acl_build_rule *b) +{ + struct rte_acl_build_rule *result = NULL; + struct rte_acl_build_rule **last_next = &result; + + while (1) { + if (a == NULL) { + *last_next = b; + break; + } else if (b == NULL) { + *last_next = a; + break; + } + if (rule_cmp_wildness(a, b) >= 0) { + *last_next = a; + last_next = &a->next; + a = a->next; + } else { + *last_next = b; + last_next = &b->next; + b = b->next; + } + } + return result; +} + +/* + * Sort list of rules based on the rules wildness. + * Use recursive mergesort algorithm. + */ +static struct rte_acl_build_rule * +sort_rules(struct rte_acl_build_rule *head) +{ + struct rte_acl_build_rule *a; + struct rte_acl_build_rule *b; + + /* Base case -- length 0 or 1 */ + if (head == NULL || head->next == NULL) + return head; + + /* Split head into 'a' and 'b' sublists */ + rule_list_split(head, &a, &b); + + /* Recursively sort the sublists */ + a = sort_rules(a); + b = sort_rules(b); + + /* answer = merge the two sorted lists together */ + return rule_list_sorted_merge(a, b); +} + +static uint32_t +acl_build_index(const struct rte_acl_config *config, uint32_t *data_index) +{ + uint32_t n, m; + int32_t last_header; + + m = 0; + last_header = -1; + + for (n = 0; n < config->num_fields; n++) { + if (last_header != config->defs[n].input_index) { + last_header = config->defs[n].input_index; + data_index[m++] = config->defs[n].offset; + } + } + + return m; +} + +static struct rte_acl_build_rule * +build_one_trie(struct acl_build_context *context, + struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES], + uint32_t n, int32_t node_max) +{ + struct rte_acl_build_rule *last; + struct rte_acl_config *config; + + config = rule_sets[n]->config; + + acl_rule_stats(rule_sets[n], config); + rule_sets[n] = sort_rules(rule_sets[n]); + + context->tries[n].type = RTE_ACL_FULL_TRIE; + context->tries[n].count = 0; + + context->tries[n].num_data_indexes = acl_build_index(config, + context->data_indexes[n]); + context->tries[n].data_index = context->data_indexes[n]; + + context->cur_node_max = node_max; + + context->bld_tries[n].trie = build_trie(context, rule_sets[n], + &last, &context->tries[n].count); + + return last; +} + +static int +acl_build_tries(struct acl_build_context *context, + struct rte_acl_build_rule *head) +{ + uint32_t n, num_tries; + struct rte_acl_config *config; + struct rte_acl_build_rule *last; + struct rte_acl_build_rule *rule_sets[RTE_ACL_MAX_TRIES]; + + config = head->config; + rule_sets[0] = head; + + /* initialize tries */ + for (n = 0; n < RTE_DIM(context->tries); n++) { + context->tries[n].type = RTE_ACL_UNUSED_TRIE; + context->bld_tries[n].trie = NULL; + context->tries[n].count = 0; + } + + context->tries[0].type = RTE_ACL_FULL_TRIE; + + /* calc wildness of each field of each rule */ + acl_calc_wildness(head, config); + + for (n = 0;; n = num_tries) { + + num_tries = n + 1; + + last = build_one_trie(context, rule_sets, n, context->node_max); + if (context->bld_tries[n].trie == NULL) { + RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n); + return -ENOMEM; + } + + /* Build of the last trie completed. */ + if (last == NULL) + break; + + if (num_tries == RTE_DIM(context->tries)) { + RTE_LOG(ERR, ACL, + "Exceeded max number of tries: %u\n", + num_tries); + return -ENOMEM; + } + + /* Trie is getting too big, split remaining rule set. */ + rule_sets[num_tries] = last->next; + last->next = NULL; + acl_free_node(context, context->bld_tries[n].trie); + + /* Create a new copy of config for remaining rules. */ + config = acl_build_alloc(context, 1, sizeof(*config)); + memcpy(config, rule_sets[n]->config, sizeof(*config)); + + /* Make remaining rules use new config. */ + for (head = rule_sets[num_tries]; head != NULL; + head = head->next) + head->config = config; + + /* + * Rebuild the trie for the reduced rule-set. + * Don't try to split it any further. + */ + last = build_one_trie(context, rule_sets, n, INT32_MAX); + if (context->bld_tries[n].trie == NULL || last != NULL) { + RTE_LOG(ERR, ACL, "Build of %u-th trie failed\n", n); + return -ENOMEM; + } + + } + + context->num_tries = num_tries; + return 0; +} + +static void +acl_build_log(const struct acl_build_context *ctx) +{ + uint32_t n; + + RTE_LOG(DEBUG, ACL, "Build phase for ACL \"%s\":\n" + "node limit for tree split: %u\n" + "nodes created: %u\n" + "memory consumed: %zu\n", + ctx->acx->name, + ctx->node_max, + ctx->num_nodes, + ctx->pool.alloc); + + for (n = 0; n < RTE_DIM(ctx->tries); n++) { + if (ctx->tries[n].count != 0) + RTE_LOG(DEBUG, ACL, + "trie %u: number of rules: %u, indexes: %u\n", + n, ctx->tries[n].count, + ctx->tries[n].num_data_indexes); + } +} + +static int +acl_build_rules(struct acl_build_context *bcx) +{ + struct rte_acl_build_rule *br, *head; + const struct rte_acl_rule *rule; + uint32_t *wp; + uint32_t fn, i, n, num; + size_t ofs, sz; + + fn = bcx->cfg.num_fields; + n = bcx->acx->num_rules; + ofs = n * sizeof(*br); + sz = ofs + n * fn * sizeof(*wp); + + br = tb_alloc(&bcx->pool, sz); + + wp = (uint32_t *)((uintptr_t)br + ofs); + num = 0; + head = NULL; + + for (i = 0; i != n; i++) { + rule = (const struct rte_acl_rule *) + ((uintptr_t)bcx->acx->rules + bcx->acx->rule_sz * i); + if ((rule->data.category_mask & bcx->category_mask) != 0) { + br[num].next = head; + br[num].config = &bcx->cfg; + br[num].f = rule; + br[num].wildness = wp; + wp += fn; + head = br + num; + num++; + } + } + + bcx->num_rules = num; + bcx->build_rules = head; + + return 0; +} + +/* + * Copy data_indexes for each trie into RT location. + */ +static void +acl_set_data_indexes(struct rte_acl_ctx *ctx) +{ + uint32_t i, n, ofs; + + ofs = 0; + for (i = 0; i != ctx->num_tries; i++) { + n = ctx->trie[i].num_data_indexes; + memcpy(ctx->data_indexes + ofs, ctx->trie[i].data_index, + n * sizeof(ctx->data_indexes[0])); + ctx->trie[i].data_index = ctx->data_indexes + ofs; + ofs += RTE_ACL_MAX_FIELDS; + } +} + +/* + * Internal routine, performs 'build' phase of trie generation: + * - setups build context. + * - analizes given set of rules. + * - builds internal tree(s). + */ +static int +acl_bld(struct acl_build_context *bcx, struct rte_acl_ctx *ctx, + const struct rte_acl_config *cfg, uint32_t node_max) +{ + int32_t rc; + + /* setup build context. */ + memset(bcx, 0, sizeof(*bcx)); + bcx->acx = ctx; + bcx->pool.alignment = ACL_POOL_ALIGN; + bcx->pool.min_alloc = ACL_POOL_ALLOC_MIN; + bcx->cfg = *cfg; + bcx->category_mask = RTE_LEN2MASK(bcx->cfg.num_categories, + typeof(bcx->category_mask)); + bcx->node_max = node_max; + + rc = sigsetjmp(bcx->pool.fail, 0); + + /* build phase runs out of memory. */ + if (rc != 0) { + RTE_LOG(ERR, ACL, + "ACL context: %s, %s() failed with error code: %d\n", + bcx->acx->name, __func__, rc); + return rc; + } + + /* Create a build rules copy. */ + rc = acl_build_rules(bcx); + if (rc != 0) + return rc; + + /* No rules to build for that context+config */ + if (bcx->build_rules == NULL) { + rc = -EINVAL; + } else { + /* build internal trie representation. */ + rc = acl_build_tries(bcx, bcx->build_rules); + } + return rc; +} + +/* + * Check that parameters for acl_build() are valid. + */ +static int +acl_check_bld_param(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg) +{ + static const size_t field_sizes[] = { + sizeof(uint8_t), sizeof(uint16_t), + sizeof(uint32_t), sizeof(uint64_t), + }; + + uint32_t i, j; + + if (ctx == NULL || cfg == NULL || cfg->num_categories == 0 || + cfg->num_categories > RTE_ACL_MAX_CATEGORIES || + cfg->num_fields == 0 || + cfg->num_fields > RTE_ACL_MAX_FIELDS) + return -EINVAL; + + for (i = 0; i != cfg->num_fields; i++) { + if (cfg->defs[i].type > RTE_ACL_FIELD_TYPE_BITMASK) { + RTE_LOG(ERR, ACL, + "ACL context: %s, invalid type: %hhu for %u-th field\n", + ctx->name, cfg->defs[i].type, i); + return -EINVAL; + } + for (j = 0; + j != RTE_DIM(field_sizes) && + cfg->defs[i].size != field_sizes[j]; + j++) + ; + + if (j == RTE_DIM(field_sizes)) { + RTE_LOG(ERR, ACL, + "ACL context: %s, invalid size: %hhu for %u-th field\n", + ctx->name, cfg->defs[i].size, i); + return -EINVAL; + } + } + + return 0; +} + +int +rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg) +{ + int32_t rc; + uint32_t n; + size_t max_size; + struct acl_build_context bcx; + + rc = acl_check_bld_param(ctx, cfg); + if (rc != 0) + return rc; + + acl_build_reset(ctx); + + if (cfg->max_size == 0) { + n = NODE_MIN; + max_size = SIZE_MAX; + } else { + n = NODE_MAX; + max_size = cfg->max_size; + } + + for (rc = -ERANGE; n >= NODE_MIN && rc == -ERANGE; n /= 2) { + + /* perform build phase. */ + rc = acl_bld(&bcx, ctx, cfg, n); + + if (rc == 0) { + /* allocate and fill run-time structures. */ + rc = rte_acl_gen(ctx, bcx.tries, bcx.bld_tries, + bcx.num_tries, bcx.cfg.num_categories, + RTE_ACL_MAX_FIELDS * RTE_DIM(bcx.tries) * + sizeof(ctx->data_indexes[0]), max_size); + if (rc == 0) { + /* set data indexes. */ + acl_set_data_indexes(ctx); + + /* copy in build config. */ + ctx->config = *cfg; + } + } + + acl_build_log(&bcx); + + /* cleanup after build. */ + tb_free_pool(&bcx.pool); + } + + return rc; +} diff --git a/lib/librte_acl/acl_gen.c b/lib/librte_acl/acl_gen.c new file mode 100644 index 00000000..ea557ab9 --- /dev/null +++ b/lib/librte_acl/acl_gen.c @@ -0,0 +1,561 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "acl.h" + +#define QRANGE_MIN ((uint8_t)INT8_MIN) + +#define RTE_ACL_VERIFY(exp) do { \ + if (!(exp)) \ + rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ +} while (0) + +struct acl_node_counters { + int32_t match; + int32_t match_used; + int32_t single; + int32_t quad; + int32_t quad_vectors; + int32_t dfa; + int32_t dfa_gr64; +}; + +struct rte_acl_indices { + int32_t dfa_index; + int32_t quad_index; + int32_t single_index; + int32_t match_index; + int32_t match_start; +}; + +static void +acl_gen_log_stats(const struct rte_acl_ctx *ctx, + const struct acl_node_counters *counts, + const struct rte_acl_indices *indices, + size_t max_size) +{ + RTE_LOG(DEBUG, ACL, "Gen phase for ACL \"%s\":\n" + "runtime memory footprint on socket %d:\n" + "single nodes/bytes used: %d/%zu\n" + "quad nodes/vectors/bytes used: %d/%d/%zu\n" + "DFA nodes/group64/bytes used: %d/%d/%zu\n" + "match nodes/bytes used: %d/%zu\n" + "total: %zu bytes\n" + "max limit: %zu bytes\n", + ctx->name, ctx->socket_id, + counts->single, counts->single * sizeof(uint64_t), + counts->quad, counts->quad_vectors, + (indices->quad_index - indices->dfa_index) * sizeof(uint64_t), + counts->dfa, counts->dfa_gr64, + indices->dfa_index * sizeof(uint64_t), + counts->match, + counts->match * sizeof(struct rte_acl_match_results), + ctx->mem_sz, + max_size); +} + +static uint64_t +acl_dfa_gen_idx(const struct rte_acl_node *node, uint32_t index) +{ + uint64_t idx; + uint32_t i; + + idx = 0; + for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { + RTE_ACL_VERIFY(node->dfa_gr64[i] < RTE_ACL_DFA_GR64_NUM); + RTE_ACL_VERIFY(node->dfa_gr64[i] < node->fanout); + idx |= (i - node->dfa_gr64[i]) << + (6 + RTE_ACL_DFA_GR64_BIT * i); + } + + return idx << (CHAR_BIT * sizeof(index)) | index | node->node_type; +} + +static void +acl_dfa_fill_gr64(const struct rte_acl_node *node, + const uint64_t src[RTE_ACL_DFA_SIZE], uint64_t dst[RTE_ACL_DFA_SIZE]) +{ + uint32_t i; + + for (i = 0; i != RTE_DIM(node->dfa_gr64); i++) { + memcpy(dst + node->dfa_gr64[i] * RTE_ACL_DFA_GR64_SIZE, + src + i * RTE_ACL_DFA_GR64_SIZE, + RTE_ACL_DFA_GR64_SIZE * sizeof(dst[0])); + } +} + +static uint32_t +acl_dfa_count_gr64(const uint64_t array_ptr[RTE_ACL_DFA_SIZE], + uint8_t gr64[RTE_ACL_DFA_GR64_NUM]) +{ + uint32_t i, j, k; + + k = 0; + for (i = 0; i != RTE_ACL_DFA_GR64_NUM; i++) { + gr64[i] = i; + for (j = 0; j != i; j++) { + if (memcmp(array_ptr + i * RTE_ACL_DFA_GR64_SIZE, + array_ptr + j * RTE_ACL_DFA_GR64_SIZE, + RTE_ACL_DFA_GR64_SIZE * + sizeof(array_ptr[0])) == 0) + break; + } + gr64[i] = (j != i) ? gr64[j] : k++; + } + + return k; +} + +static uint32_t +acl_node_fill_dfa(const struct rte_acl_node *node, + uint64_t dfa[RTE_ACL_DFA_SIZE], uint64_t no_match, int32_t resolved) +{ + uint32_t n, x; + uint32_t ranges, last_bit; + struct rte_acl_node *child; + struct rte_acl_bitset *bits; + + ranges = 0; + last_bit = 0; + + for (n = 0; n < RTE_ACL_DFA_SIZE; n++) + dfa[n] = no_match; + + for (x = 0; x < node->num_ptrs; x++) { + + child = node->ptrs[x].ptr; + if (child == NULL) + continue; + + bits = &node->ptrs[x].values; + for (n = 0; n < RTE_ACL_DFA_SIZE; n++) { + + if (bits->bits[n / (sizeof(bits_t) * CHAR_BIT)] & + (1 << (n % (sizeof(bits_t) * CHAR_BIT)))) { + + dfa[n] = resolved ? child->node_index : x; + ranges += (last_bit == 0); + last_bit = 1; + } else { + last_bit = 0; + } + } + } + + return ranges; +} + +/* +* Counts the number of groups of sequential bits that are +* either 0 or 1, as specified by the zero_one parameter. This is used to +* calculate the number of ranges in a node to see if it fits in a quad range +* node. +*/ +static int +acl_count_sequential_groups(struct rte_acl_bitset *bits, int zero_one) +{ + int n, ranges, last_bit; + + ranges = 0; + last_bit = zero_one ^ 1; + + for (n = QRANGE_MIN; n < UINT8_MAX + 1; n++) { + if (bits->bits[n / (sizeof(bits_t) * 8)] & + (1 << (n % (sizeof(bits_t) * 8)))) { + if (zero_one == 1 && last_bit != 1) + ranges++; + last_bit = 1; + } else { + if (zero_one == 0 && last_bit != 0) + ranges++; + last_bit = 0; + } + } + for (n = 0; n < QRANGE_MIN; n++) { + if (bits->bits[n / (sizeof(bits_t) * 8)] & + (1 << (n % (sizeof(bits_t) * 8)))) { + if (zero_one == 1 && last_bit != 1) + ranges++; + last_bit = 1; + } else { + if (zero_one == 0 && last_bit != 0) + ranges++; + last_bit = 0; + } + } + + return ranges; +} + +/* + * Count number of ranges spanned by the node's pointers + */ +static int +acl_count_fanout(struct rte_acl_node *node) +{ + uint32_t n; + int ranges; + + if (node->fanout != 0) + return node->fanout; + + ranges = acl_count_sequential_groups(&node->values, 0); + + for (n = 0; n < node->num_ptrs; n++) { + if (node->ptrs[n].ptr != NULL) + ranges += acl_count_sequential_groups( + &node->ptrs[n].values, 1); + } + + node->fanout = ranges; + return node->fanout; +} + +/* + * Determine the type of nodes and count each type + */ +static void +acl_count_trie_types(struct acl_node_counters *counts, + struct rte_acl_node *node, uint64_t no_match, int force_dfa) +{ + uint32_t n; + int num_ptrs; + uint64_t dfa[RTE_ACL_DFA_SIZE]; + + /* skip if this node has been counted */ + if (node->node_type != (uint32_t)RTE_ACL_NODE_UNDEFINED) + return; + + if (node->match_flag != 0 || node->num_ptrs == 0) { + counts->match++; + node->node_type = RTE_ACL_NODE_MATCH; + return; + } + + num_ptrs = acl_count_fanout(node); + + /* Force type to dfa */ + if (force_dfa) + num_ptrs = RTE_ACL_DFA_SIZE; + + /* determine node type based on number of ranges */ + if (num_ptrs == 1) { + counts->single++; + node->node_type = RTE_ACL_NODE_SINGLE; + } else if (num_ptrs <= RTE_ACL_QUAD_MAX) { + counts->quad++; + counts->quad_vectors += node->fanout; + node->node_type = RTE_ACL_NODE_QRANGE; + } else { + counts->dfa++; + node->node_type = RTE_ACL_NODE_DFA; + if (force_dfa != 0) { + /* always expand to a max number of nodes. */ + for (n = 0; n != RTE_DIM(node->dfa_gr64); n++) + node->dfa_gr64[n] = n; + node->fanout = n; + } else { + acl_node_fill_dfa(node, dfa, no_match, 0); + node->fanout = acl_dfa_count_gr64(dfa, node->dfa_gr64); + } + counts->dfa_gr64 += node->fanout; + } + + /* + * recursively count the types of all children + */ + for (n = 0; n < node->num_ptrs; n++) { + if (node->ptrs[n].ptr != NULL) + acl_count_trie_types(counts, node->ptrs[n].ptr, + no_match, 0); + } +} + +static void +acl_add_ptrs(struct rte_acl_node *node, uint64_t *node_array, uint64_t no_match, + int resolved) +{ + uint32_t x; + int32_t m; + uint64_t *node_a, index, dfa[RTE_ACL_DFA_SIZE]; + + acl_node_fill_dfa(node, dfa, no_match, resolved); + + /* + * Rather than going from 0 to 256, the range count and + * the layout are from 80-ff then 0-7f due to signed compare + * for SSE (cmpgt). + */ + if (node->node_type == RTE_ACL_NODE_QRANGE) { + + m = 0; + node_a = node_array; + index = dfa[QRANGE_MIN]; + *node_a++ = index; + + for (x = QRANGE_MIN + 1; x < UINT8_MAX + 1; x++) { + if (dfa[x] != index) { + index = dfa[x]; + *node_a++ = index; + node->transitions[m++] = (uint8_t)(x - 1); + } + } + + for (x = 0; x < INT8_MAX + 1; x++) { + if (dfa[x] != index) { + index = dfa[x]; + *node_a++ = index; + node->transitions[m++] = (uint8_t)(x - 1); + } + } + + /* fill unused locations with max value - nothing is greater */ + for (; m < RTE_ACL_QUAD_SIZE; m++) + node->transitions[m] = INT8_MAX; + + RTE_ACL_VERIFY(m <= RTE_ACL_QUAD_SIZE); + + } else if (node->node_type == RTE_ACL_NODE_DFA && resolved) { + acl_dfa_fill_gr64(node, dfa, node_array); + } +} + +/* + * Routine that allocates space for this node and recursively calls + * to allocate space for each child. Once all the children are allocated, + * then resolve all transitions for this node. + */ +static void +acl_gen_node(struct rte_acl_node *node, uint64_t *node_array, + uint64_t no_match, struct rte_acl_indices *index, int num_categories) +{ + uint32_t n, sz, *qtrp; + uint64_t *array_ptr; + struct rte_acl_match_results *match; + + if (node->node_index != RTE_ACL_NODE_UNDEFINED) + return; + + array_ptr = NULL; + + switch (node->node_type) { + case RTE_ACL_NODE_DFA: + array_ptr = &node_array[index->dfa_index]; + node->node_index = acl_dfa_gen_idx(node, index->dfa_index); + sz = node->fanout * RTE_ACL_DFA_GR64_SIZE; + index->dfa_index += sz; + for (n = 0; n < sz; n++) + array_ptr[n] = no_match; + break; + case RTE_ACL_NODE_SINGLE: + node->node_index = RTE_ACL_QUAD_SINGLE | index->single_index | + node->node_type; + array_ptr = &node_array[index->single_index]; + index->single_index += 1; + array_ptr[0] = no_match; + break; + case RTE_ACL_NODE_QRANGE: + array_ptr = &node_array[index->quad_index]; + acl_add_ptrs(node, array_ptr, no_match, 0); + qtrp = (uint32_t *)node->transitions; + node->node_index = qtrp[0]; + node->node_index <<= sizeof(index->quad_index) * CHAR_BIT; + node->node_index |= index->quad_index | node->node_type; + index->quad_index += node->fanout; + break; + case RTE_ACL_NODE_MATCH: + match = ((struct rte_acl_match_results *) + (node_array + index->match_start)); + for (n = 0; n != RTE_DIM(match->results); n++) + RTE_ACL_VERIFY(match->results[0] == 0); + memcpy(match + index->match_index, node->mrt, + sizeof(*node->mrt)); + node->node_index = index->match_index | node->node_type; + index->match_index += 1; + break; + case RTE_ACL_NODE_UNDEFINED: + RTE_ACL_VERIFY(node->node_type != + (uint32_t)RTE_ACL_NODE_UNDEFINED); + break; + } + + /* recursively allocate space for all children */ + for (n = 0; n < node->num_ptrs; n++) { + if (node->ptrs[n].ptr != NULL) + acl_gen_node(node->ptrs[n].ptr, + node_array, + no_match, + index, + num_categories); + } + + /* All children are resolved, resolve this node's pointers */ + switch (node->node_type) { + case RTE_ACL_NODE_DFA: + acl_add_ptrs(node, array_ptr, no_match, 1); + break; + case RTE_ACL_NODE_SINGLE: + for (n = 0; n < node->num_ptrs; n++) { + if (node->ptrs[n].ptr != NULL) + array_ptr[0] = node->ptrs[n].ptr->node_index; + } + break; + case RTE_ACL_NODE_QRANGE: + acl_add_ptrs(node, array_ptr, no_match, 1); + break; + case RTE_ACL_NODE_MATCH: + break; + case RTE_ACL_NODE_UNDEFINED: + RTE_ACL_VERIFY(node->node_type != + (uint32_t)RTE_ACL_NODE_UNDEFINED); + break; + } +} + +static void +acl_calc_counts_indices(struct acl_node_counters *counts, + struct rte_acl_indices *indices, + struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, + uint64_t no_match) +{ + uint32_t n; + + memset(indices, 0, sizeof(*indices)); + memset(counts, 0, sizeof(*counts)); + + /* Get stats on nodes */ + for (n = 0; n < num_tries; n++) { + acl_count_trie_types(counts, node_bld_trie[n].trie, + no_match, 1); + } + + indices->dfa_index = RTE_ACL_DFA_SIZE + 1; + indices->quad_index = indices->dfa_index + + counts->dfa_gr64 * RTE_ACL_DFA_GR64_SIZE; + indices->single_index = indices->quad_index + counts->quad_vectors; + indices->match_start = indices->single_index + counts->single + 1; + indices->match_start = RTE_ALIGN(indices->match_start, + (XMM_SIZE / sizeof(uint64_t))); + indices->match_index = 1; +} + +/* + * Generate the runtime structure using build structure + */ +int +rte_acl_gen(struct rte_acl_ctx *ctx, struct rte_acl_trie *trie, + struct rte_acl_bld_trie *node_bld_trie, uint32_t num_tries, + uint32_t num_categories, uint32_t data_index_sz, size_t max_size) +{ + void *mem; + size_t total_size; + uint64_t *node_array, no_match; + uint32_t n, match_index; + struct rte_acl_match_results *match; + struct acl_node_counters counts; + struct rte_acl_indices indices; + + no_match = RTE_ACL_NODE_MATCH; + + /* Fill counts and indices arrays from the nodes. */ + acl_calc_counts_indices(&counts, &indices, + node_bld_trie, num_tries, no_match); + + /* Allocate runtime memory (align to cache boundary) */ + total_size = RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE) + + indices.match_start * sizeof(uint64_t) + + (counts.match + 1) * sizeof(struct rte_acl_match_results) + + XMM_SIZE; + + if (total_size > max_size) { + RTE_LOG(DEBUG, ACL, + "Gen phase for ACL ctx \"%s\" exceeds max_size limit, " + "bytes required: %zu, allowed: %zu\n", + ctx->name, total_size, max_size); + return -ERANGE; + } + + mem = rte_zmalloc_socket(ctx->name, total_size, RTE_CACHE_LINE_SIZE, + ctx->socket_id); + if (mem == NULL) { + RTE_LOG(ERR, ACL, + "allocation of %zu bytes on socket %d for %s failed\n", + total_size, ctx->socket_id, ctx->name); + return -ENOMEM; + } + + /* Fill the runtime structure */ + match_index = indices.match_start; + node_array = (uint64_t *)((uintptr_t)mem + + RTE_ALIGN(data_index_sz, RTE_CACHE_LINE_SIZE)); + + /* + * Setup the NOMATCH node (a SINGLE at the + * highest index, that points to itself) + */ + + node_array[RTE_ACL_DFA_SIZE] = RTE_ACL_DFA_SIZE | RTE_ACL_NODE_SINGLE; + + for (n = 0; n < RTE_ACL_DFA_SIZE; n++) + node_array[n] = no_match; + + /* NOMATCH result at index 0 */ + match = ((struct rte_acl_match_results *)(node_array + match_index)); + memset(match, 0, sizeof(*match)); + + for (n = 0; n < num_tries; n++) { + + acl_gen_node(node_bld_trie[n].trie, node_array, no_match, + &indices, num_categories); + + if (node_bld_trie[n].trie->node_index == no_match) + trie[n].root_index = 0; + else + trie[n].root_index = node_bld_trie[n].trie->node_index; + } + + ctx->mem = mem; + ctx->mem_sz = total_size; + ctx->data_indexes = mem; + ctx->num_tries = num_tries; + ctx->num_categories = num_categories; + ctx->match_index = match_index; + ctx->no_match = no_match; + ctx->idle = node_array[RTE_ACL_DFA_SIZE]; + ctx->trans_table = node_array; + memcpy(ctx->trie, trie, sizeof(ctx->trie)); + + acl_gen_log_stats(ctx, &counts, &indices, max_size); + return 0; +} diff --git a/lib/librte_acl/acl_run.h b/lib/librte_acl/acl_run.h new file mode 100644 index 00000000..b2fc42c6 --- /dev/null +++ b/lib/librte_acl/acl_run.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _ACL_RUN_H_ +#define _ACL_RUN_H_ + +#include +#include "acl.h" + +#define MAX_SEARCHES_AVX16 16 +#define MAX_SEARCHES_SSE8 8 +#define MAX_SEARCHES_SSE4 4 +#define MAX_SEARCHES_SCALAR 2 + +#define GET_NEXT_4BYTES(prm, idx) \ + (*((const int32_t *)((prm)[(idx)].data + *(prm)[idx].data_index++))) + + +#define RTE_ACL_NODE_INDEX ((uint32_t)~RTE_ACL_NODE_TYPE) + +#define SCALAR_QRANGE_MULT 0x01010101 +#define SCALAR_QRANGE_MASK 0x7f7f7f7f +#define SCALAR_QRANGE_MIN 0x80808080 + +/* + * Structure to manage N parallel trie traversals. + * The runtime trie traversal routines can process 8, 4, or 2 tries + * in parallel. Each packet may require multiple trie traversals (up to 4). + * This structure is used to fill the slots (0 to n-1) for parallel processing + * with the trie traversals needed for each packet. + */ +struct acl_flow_data { + uint32_t num_packets; + /* number of packets processed */ + uint32_t started; + /* number of trie traversals in progress */ + uint32_t trie; + /* current trie index (0 to N-1) */ + uint32_t cmplt_size; + uint32_t total_packets; + uint32_t categories; + /* number of result categories per packet. */ + /* maximum number of packets to process */ + const uint64_t *trans; + const uint8_t **data; + uint32_t *results; + struct completion *last_cmplt; + struct completion *cmplt_array; +}; + +/* + * Structure to maintain running results for + * a single packet (up to 4 tries). + */ +struct completion { + uint32_t *results; /* running results. */ + int32_t priority[RTE_ACL_MAX_CATEGORIES]; /* running priorities. */ + uint32_t count; /* num of remaining tries */ + /* true for allocated struct */ +} __attribute__((aligned(XMM_SIZE))); + +/* + * One parms structure for each slot in the search engine. + */ +struct parms { + const uint8_t *data; + /* input data for this packet */ + const uint32_t *data_index; + /* data indirection for this trie */ + struct completion *cmplt; + /* completion data for this packet */ +}; + +/* + * Define an global idle node for unused engine slots + */ +static const uint32_t idle[UINT8_MAX + 1]; + +/* + * Allocate a completion structure to manage the tries for a packet. + */ +static inline struct completion * +alloc_completion(struct completion *p, uint32_t size, uint32_t tries, + uint32_t *results) +{ + uint32_t n; + + for (n = 0; n < size; n++) { + + if (p[n].count == 0) { + + /* mark as allocated and set number of tries. */ + p[n].count = tries; + p[n].results = results; + return &(p[n]); + } + } + + /* should never get here */ + return NULL; +} + +/* + * Resolve priority for a single result trie. + */ +static inline void +resolve_single_priority(uint64_t transition, int n, + const struct rte_acl_ctx *ctx, struct parms *parms, + const struct rte_acl_match_results *p) +{ + if (parms[n].cmplt->count == ctx->num_tries || + parms[n].cmplt->priority[0] <= + p[transition].priority[0]) { + + parms[n].cmplt->priority[0] = p[transition].priority[0]; + parms[n].cmplt->results[0] = p[transition].results[0]; + } +} + +/* + * Routine to fill a slot in the parallel trie traversal array (parms) from + * the list of packets (flows). + */ +static inline uint64_t +acl_start_next_trie(struct acl_flow_data *flows, struct parms *parms, int n, + const struct rte_acl_ctx *ctx) +{ + uint64_t transition; + + /* if there are any more packets to process */ + if (flows->num_packets < flows->total_packets) { + parms[n].data = flows->data[flows->num_packets]; + parms[n].data_index = ctx->trie[flows->trie].data_index; + + /* if this is the first trie for this packet */ + if (flows->trie == 0) { + flows->last_cmplt = alloc_completion(flows->cmplt_array, + flows->cmplt_size, ctx->num_tries, + flows->results + + flows->num_packets * flows->categories); + } + + /* set completion parameters and starting index for this slot */ + parms[n].cmplt = flows->last_cmplt; + transition = + flows->trans[parms[n].data[*parms[n].data_index++] + + ctx->trie[flows->trie].root_index]; + + /* + * if this is the last trie for this packet, + * then setup next packet. + */ + flows->trie++; + if (flows->trie >= ctx->num_tries) { + flows->trie = 0; + flows->num_packets++; + } + + /* keep track of number of active trie traversals */ + flows->started++; + + /* no more tries to process, set slot to an idle position */ + } else { + transition = ctx->idle; + parms[n].data = (const uint8_t *)idle; + parms[n].data_index = idle; + } + return transition; +} + +static inline void +acl_set_flow(struct acl_flow_data *flows, struct completion *cmplt, + uint32_t cmplt_size, const uint8_t **data, uint32_t *results, + uint32_t data_num, uint32_t categories, const uint64_t *trans) +{ + flows->num_packets = 0; + flows->started = 0; + flows->trie = 0; + flows->last_cmplt = NULL; + flows->cmplt_array = cmplt; + flows->total_packets = data_num; + flows->categories = categories; + flows->cmplt_size = cmplt_size; + flows->data = data; + flows->results = results; + flows->trans = trans; +} + +typedef void (*resolve_priority_t) +(uint64_t transition, int n, const struct rte_acl_ctx *ctx, + struct parms *parms, const struct rte_acl_match_results *p, + uint32_t categories); + +/* + * Detect matches. If a match node transition is found, then this trie + * traversal is complete and fill the slot with the next trie + * to be processed. + */ +static inline uint64_t +acl_match_check(uint64_t transition, int slot, + const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, resolve_priority_t resolve_priority) +{ + const struct rte_acl_match_results *p; + + p = (const struct rte_acl_match_results *) + (flows->trans + ctx->match_index); + + if (transition & RTE_ACL_NODE_MATCH) { + + /* Remove flags from index and decrement active traversals */ + transition &= RTE_ACL_NODE_INDEX; + flows->started--; + + /* Resolve priorities for this trie and running results */ + if (flows->categories == 1) + resolve_single_priority(transition, slot, ctx, + parms, p); + else + resolve_priority(transition, slot, ctx, parms, + p, flows->categories); + + /* Count down completed tries for this search request */ + parms[slot].cmplt->count--; + + /* Fill the slot with the next trie or idle trie */ + transition = acl_start_next_trie(flows, parms, slot, ctx); + } + + return transition; +} + +#endif /* _ACL_RUN_H_ */ diff --git a/lib/librte_acl/acl_run_avx2.c b/lib/librte_acl/acl_run_avx2.c new file mode 100644 index 00000000..79ebbd6c --- /dev/null +++ b/lib/librte_acl/acl_run_avx2.c @@ -0,0 +1,54 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + +#include "acl_run_avx2.h" + +/* + * Note, that to be able to use AVX2 classify method, + * both compiler and target cpu have to support AVX2 instructions. + */ +int +rte_acl_classify_avx2(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (likely(num >= MAX_SEARCHES_AVX16)) + return search_avx2x16(ctx, data, results, num, categories); + else if (num >= MAX_SEARCHES_SSE8) + return search_sse_8(ctx, data, results, num, categories); + else if (num >= MAX_SEARCHES_SSE4) + return search_sse_4(ctx, data, results, num, categories); + else + return rte_acl_classify_scalar(ctx, data, results, num, + categories); +} diff --git a/lib/librte_acl/acl_run_avx2.h b/lib/librte_acl/acl_run_avx2.h new file mode 100644 index 00000000..b01a46a5 --- /dev/null +++ b/lib/librte_acl/acl_run_avx2.h @@ -0,0 +1,284 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run_sse.h" + +static const rte_ymm_t ymm_match_mask = { + .u32 = { + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + }, +}; + +static const rte_ymm_t ymm_index_mask = { + .u32 = { + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + }, +}; + +static const rte_ymm_t ymm_shuffle_input = { + .u32 = { + 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c, + 0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c, + }, +}; + +static const rte_ymm_t ymm_ones_16 = { + .u16 = { + 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + }, +}; + +static const rte_ymm_t ymm_range_base = { + .u32 = { + 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c, + 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c, + }, +}; + +/* + * Process 8 transitions in parallel. + * tr_lo contains low 32 bits for 8 transition. + * tr_hi contains high 32 bits for 8 transition. + * next_input contains up to 4 input bytes for 8 flows. + */ +static inline __attribute__((always_inline)) ymm_t +transition8(ymm_t next_input, const uint64_t *trans, ymm_t *tr_lo, ymm_t *tr_hi) +{ + const int32_t *tr; + ymm_t addr; + + tr = (const int32_t *)(uintptr_t)trans; + + /* Calculate the address (array index) for all 8 transitions. */ + ACL_TR_CALC_ADDR(mm256, 256, addr, ymm_index_mask.y, next_input, + ymm_shuffle_input.y, ymm_ones_16.y, ymm_range_base.y, + *tr_lo, *tr_hi); + + /* load lower 32 bits of 8 transactions at once. */ + *tr_lo = _mm256_i32gather_epi32(tr, addr, sizeof(trans[0])); + + next_input = _mm256_srli_epi32(next_input, CHAR_BIT); + + /* load high 32 bits of 8 transactions at once. */ + *tr_hi = _mm256_i32gather_epi32(tr + 1, addr, sizeof(trans[0])); + + return next_input; +} + +/* + * Process matches for 8 flows. + * tr_lo contains low 32 bits for 8 transition. + * tr_hi contains high 32 bits for 8 transition. + */ +static inline void +acl_process_matches_avx2x8(const struct rte_acl_ctx *ctx, + struct parms *parms, struct acl_flow_data *flows, uint32_t slot, + ymm_t matches, ymm_t *tr_lo, ymm_t *tr_hi) +{ + ymm_t t0, t1; + ymm_t lo, hi; + xmm_t l0, l1; + uint32_t i; + uint64_t tr[MAX_SEARCHES_SSE8]; + + l1 = _mm256_extracti128_si256(*tr_lo, 1); + l0 = _mm256_castsi256_si128(*tr_lo); + + for (i = 0; i != RTE_DIM(tr) / 2; i++) { + + /* + * Extract low 32bits of each transition. + * That's enough to process the match. + */ + tr[i] = (uint32_t)_mm_cvtsi128_si32(l0); + tr[i + 4] = (uint32_t)_mm_cvtsi128_si32(l1); + + l0 = _mm_srli_si128(l0, sizeof(uint32_t)); + l1 = _mm_srli_si128(l1, sizeof(uint32_t)); + + tr[i] = acl_match_check(tr[i], slot + i, + ctx, parms, flows, resolve_priority_sse); + tr[i + 4] = acl_match_check(tr[i + 4], slot + i + 4, + ctx, parms, flows, resolve_priority_sse); + } + + /* Collect new transitions into 2 YMM registers. */ + t0 = _mm256_set_epi64x(tr[5], tr[4], tr[1], tr[0]); + t1 = _mm256_set_epi64x(tr[7], tr[6], tr[3], tr[2]); + + /* For each transition: put low 32 into tr_lo and high 32 into tr_hi */ + ACL_TR_HILO(mm256, __m256, t0, t1, lo, hi); + + /* Keep transitions wth NOMATCH intact. */ + *tr_lo = _mm256_blendv_epi8(*tr_lo, lo, matches); + *tr_hi = _mm256_blendv_epi8(*tr_hi, hi, matches); +} + +static inline void +acl_match_check_avx2x8(const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, uint32_t slot, + ymm_t *tr_lo, ymm_t *tr_hi, ymm_t match_mask) +{ + uint32_t msk; + ymm_t matches, temp; + + /* test for match node */ + temp = _mm256_and_si256(match_mask, *tr_lo); + matches = _mm256_cmpeq_epi32(temp, match_mask); + msk = _mm256_movemask_epi8(matches); + + while (msk != 0) { + + acl_process_matches_avx2x8(ctx, parms, flows, slot, + matches, tr_lo, tr_hi); + temp = _mm256_and_si256(match_mask, *tr_lo); + matches = _mm256_cmpeq_epi32(temp, match_mask); + msk = _mm256_movemask_epi8(matches); + } +} + +/* + * Execute trie traversal for up to 16 flows in parallel. + */ +static inline int +search_avx2x16(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + uint32_t n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_AVX16]; + struct completion cmplt[MAX_SEARCHES_AVX16]; + struct parms parms[MAX_SEARCHES_AVX16]; + ymm_t input[2], tr_lo[2], tr_hi[2]; + ymm_t t0, t1; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < RTE_DIM(cmplt); n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + t0 = _mm256_set_epi64x(index_array[5], index_array[4], + index_array[1], index_array[0]); + t1 = _mm256_set_epi64x(index_array[7], index_array[6], + index_array[3], index_array[2]); + + ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[0], tr_hi[0]); + + t0 = _mm256_set_epi64x(index_array[13], index_array[12], + index_array[9], index_array[8]); + t1 = _mm256_set_epi64x(index_array[15], index_array[14], + index_array[11], index_array[10]); + + ACL_TR_HILO(mm256, __m256, t0, t1, tr_lo[1], tr_hi[1]); + + /* Check for any matches. */ + acl_match_check_avx2x8(ctx, parms, &flows, 0, &tr_lo[0], &tr_hi[0], + ymm_match_mask.y); + acl_match_check_avx2x8(ctx, parms, &flows, 8, &tr_lo[1], &tr_hi[1], + ymm_match_mask.y); + + while (flows.started > 0) { + + uint32_t in[MAX_SEARCHES_SSE8]; + + /* Gather 4 bytes of input data for first 8 flows. */ + in[0] = GET_NEXT_4BYTES(parms, 0); + in[4] = GET_NEXT_4BYTES(parms, 4); + in[1] = GET_NEXT_4BYTES(parms, 1); + in[5] = GET_NEXT_4BYTES(parms, 5); + in[2] = GET_NEXT_4BYTES(parms, 2); + in[6] = GET_NEXT_4BYTES(parms, 6); + in[3] = GET_NEXT_4BYTES(parms, 3); + in[7] = GET_NEXT_4BYTES(parms, 7); + input[0] = _mm256_set_epi32(in[7], in[6], in[5], in[4], + in[3], in[2], in[1], in[0]); + + /* Gather 4 bytes of input data for last 8 flows. */ + in[0] = GET_NEXT_4BYTES(parms, 8); + in[4] = GET_NEXT_4BYTES(parms, 12); + in[1] = GET_NEXT_4BYTES(parms, 9); + in[5] = GET_NEXT_4BYTES(parms, 13); + in[2] = GET_NEXT_4BYTES(parms, 10); + in[6] = GET_NEXT_4BYTES(parms, 14); + in[3] = GET_NEXT_4BYTES(parms, 11); + in[7] = GET_NEXT_4BYTES(parms, 15); + input[1] = _mm256_set_epi32(in[7], in[6], in[5], in[4], + in[3], in[2], in[1], in[0]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + input[0] = transition8(input[0], flows.trans, + &tr_lo[0], &tr_hi[0]); + input[1] = transition8(input[1], flows.trans, + &tr_lo[1], &tr_hi[1]); + + /* Check for any matches. */ + acl_match_check_avx2x8(ctx, parms, &flows, 0, + &tr_lo[0], &tr_hi[0], ymm_match_mask.y); + acl_match_check_avx2x8(ctx, parms, &flows, 8, + &tr_lo[1], &tr_hi[1], ymm_match_mask.y); + } + + return 0; +} diff --git a/lib/librte_acl/acl_run_neon.c b/lib/librte_acl/acl_run_neon.c new file mode 100644 index 00000000..b0144514 --- /dev/null +++ b/lib/librte_acl/acl_run_neon.c @@ -0,0 +1,46 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "acl_run_neon.h" + +int +rte_acl_classify_neon(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (likely(num >= 8)) + return search_neon_8(ctx, data, results, num, categories); + else if (num >= 4) + return search_neon_4(ctx, data, results, num, categories); + else + return rte_acl_classify_scalar(ctx, data, results, num, + categories); +} diff --git a/lib/librte_acl/acl_run_neon.h b/lib/librte_acl/acl_run_neon.h new file mode 100644 index 00000000..d233ff00 --- /dev/null +++ b/lib/librte_acl/acl_run_neon.h @@ -0,0 +1,289 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "acl_run.h" +#include "acl_vect.h" + +struct _neon_acl_const { + rte_xmm_t xmm_shuffle_input; + rte_xmm_t xmm_index_mask; + rte_xmm_t range_base; +} neon_acl_const __attribute__((aligned(RTE_CACHE_LINE_SIZE))) = { + { + .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c} + }, + { + .u32 = {RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, RTE_ACL_NODE_INDEX} + }, + { + .u32 = {0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c} + }, +}; + +/* + * Resolve priority for multiple results (neon version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_neon(uint64_t transition, int n, const struct rte_acl_ctx *ctx, + struct parms *parms, + const struct rte_acl_match_results *p, + uint32_t categories) +{ + uint32_t x; + int32x4_t results, priority, results1, priority1; + uint32x4_t selector; + int32_t *saved_results, *saved_priority; + + for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) { + saved_results = (int32_t *)(&parms[n].cmplt->results[x]); + saved_priority = (int32_t *)(&parms[n].cmplt->priority[x]); + + /* get results and priorities for completed trie */ + results = vld1q_s32( + (const int32_t *)&p[transition].results[x]); + priority = vld1q_s32( + (const int32_t *)&p[transition].priority[x]); + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + /* get running best results and their priorities */ + results1 = vld1q_s32(saved_results); + priority1 = vld1q_s32(saved_priority); + + /* select results that are highest priority */ + selector = vcgtq_s32(priority1, priority); + results = vbslq_s32(selector, results1, results); + priority = vbslq_s32(selector, priority1, priority); + } + + /* save running best results and their priorities */ + vst1q_s32(saved_results, results); + vst1q_s32(saved_priority, priority); + } +} + +/* + * Check for any match in 4 transitions + */ +static inline __attribute__((always_inline)) uint32_t +check_any_match_x4(uint64_t val[]) +{ + return (val[0] | val[1] | val[2] | val[3]) & RTE_ACL_NODE_MATCH; +} + +static inline __attribute__((always_inline)) void +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, uint64_t transitions[]) +{ + while (check_any_match_x4(transitions)) { + transitions[0] = acl_match_check(transitions[0], slot, ctx, + parms, flows, resolve_priority_neon); + transitions[1] = acl_match_check(transitions[1], slot + 1, ctx, + parms, flows, resolve_priority_neon); + transitions[2] = acl_match_check(transitions[2], slot + 2, ctx, + parms, flows, resolve_priority_neon); + transitions[3] = acl_match_check(transitions[3], slot + 3, ctx, + parms, flows, resolve_priority_neon); + } +} + +/* + * Process 4 transitions (in 2 NEON Q registers) in parallel + */ +static inline __attribute__((always_inline)) int32x4_t +transition4(int32x4_t next_input, const uint64_t *trans, uint64_t transitions[]) +{ + int32x4x2_t tr_hi_lo; + int32x4_t t, in, r; + uint32x4_t index_msk, node_type, addr; + uint32x4_t dfa_msk, mask, quad_ofs, dfa_ofs; + + /* Move low 32 into tr_hi_lo.val[0] and high 32 into tr_hi_lo.val[1] */ + tr_hi_lo = vld2q_s32((const int32_t *)transitions); + + /* Calculate the address (array index) for all 4 transitions. */ + + index_msk = vld1q_u32((const uint32_t *)&neon_acl_const.xmm_index_mask); + + /* Calc node type and node addr */ + node_type = vbicq_s32(tr_hi_lo.val[0], index_msk); + addr = vandq_s32(tr_hi_lo.val[0], index_msk); + + /* t = 0 */ + t = veorq_s32(node_type, node_type); + + /* mask for DFA type(0) nodes */ + dfa_msk = vceqq_u32(node_type, t); + + mask = vld1q_s32((const int32_t *)&neon_acl_const.xmm_shuffle_input); + in = vqtbl1q_u8((uint8x16_t)next_input, (uint8x16_t)mask); + + /* DFA calculations. */ + r = vshrq_n_u32(in, 30); /* div by 64 */ + mask = vld1q_s32((const int32_t *)&neon_acl_const.range_base); + r = vaddq_u8(r, mask); + t = vshrq_n_u32(in, 24); + r = vqtbl1q_u8((uint8x16_t)tr_hi_lo.val[1], (uint8x16_t)r); + dfa_ofs = vsubq_s32(t, r); + + /* QUAD/SINGLE calculations. */ + t = vcgtq_s8(in, tr_hi_lo.val[1]); + t = vabsq_s8(t); + t = vpaddlq_u8(t); + quad_ofs = vpaddlq_u16(t); + + /* blend DFA and QUAD/SINGLE. */ + t = vbslq_u8(dfa_msk, dfa_ofs, quad_ofs); + + /* calculate address for next transitions */ + addr = vaddq_u32(addr, t); + + /* Fill next transitions */ + transitions[0] = trans[vgetq_lane_u32(addr, 0)]; + transitions[1] = trans[vgetq_lane_u32(addr, 1)]; + transitions[2] = trans[vgetq_lane_u32(addr, 2)]; + transitions[3] = trans[vgetq_lane_u32(addr, 3)]; + + return vshrq_n_u32(next_input, CHAR_BIT); +} + +/* + * Execute trie traversal with 8 traversals in parallel + */ +static inline int +search_neon_8(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[8]; + struct completion cmplt[8]; + struct parms parms[8]; + int32x4_t input0, input1; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < 8; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]); + + while (flows.started > 0) { + /* Gather 4 bytes of input data for each stream. */ + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input0, 0); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 4), input1, 0); + + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input0, 1); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 5), input1, 1); + + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input0, 2); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 6), input1, 2); + + input0 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input0, 3); + input1 = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 7), input1, 3); + + /* Process the 4 bytes of input on each stream. */ + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + input0 = transition4(input0, flows.trans, &index_array[0]); + input1 = transition4(input1, flows.trans, &index_array[4]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, &index_array[0]); + acl_match_check_x4(4, ctx, parms, &flows, &index_array[4]); + } + + return 0; +} + +/* + * Execute trie traversal with 4 traversals in parallel + */ +static inline int +search_neon_4(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, int total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[4]; + struct completion cmplt[4]; + struct parms parms[4]; + int32x4_t input; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < 4; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + + while (flows.started > 0) { + /* Gather 4 bytes of input data for each stream. */ + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 0), input, 0); + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 1), input, 1); + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 2), input, 2); + input = vsetq_lane_s32(GET_NEXT_4BYTES(parms, 3), input, 3); + + /* Process the 4 bytes of input on each stream. */ + input = transition4(input, flows.trans, index_array); + input = transition4(input, flows.trans, index_array); + input = transition4(input, flows.trans, index_array); + input = transition4(input, flows.trans, index_array); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, index_array); + } + + return 0; +} diff --git a/lib/librte_acl/acl_run_scalar.c b/lib/librte_acl/acl_run_scalar.c new file mode 100644 index 00000000..5be216c6 --- /dev/null +++ b/lib/librte_acl/acl_run_scalar.c @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run.h" + +/* + * Resolve priority for multiple results (scalar version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_scalar(uint64_t transition, int n, + const struct rte_acl_ctx *ctx, struct parms *parms, + const struct rte_acl_match_results *p, uint32_t categories) +{ + uint32_t i; + int32_t *saved_priority; + uint32_t *saved_results; + const int32_t *priority; + const uint32_t *results; + + saved_results = parms[n].cmplt->results; + saved_priority = parms[n].cmplt->priority; + + /* results and priorities for completed trie */ + results = p[transition].results; + priority = p[transition].priority; + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + for (i = 0; i < categories; i += RTE_ACL_RESULTS_MULTIPLIER) { + + if (saved_priority[i] <= priority[i]) { + saved_priority[i] = priority[i]; + saved_results[i] = results[i]; + } + if (saved_priority[i + 1] <= priority[i + 1]) { + saved_priority[i + 1] = priority[i + 1]; + saved_results[i + 1] = results[i + 1]; + } + if (saved_priority[i + 2] <= priority[i + 2]) { + saved_priority[i + 2] = priority[i + 2]; + saved_results[i + 2] = results[i + 2]; + } + if (saved_priority[i + 3] <= priority[i + 3]) { + saved_priority[i + 3] = priority[i + 3]; + saved_results[i + 3] = results[i + 3]; + } + } + } else { + for (i = 0; i < categories; i += RTE_ACL_RESULTS_MULTIPLIER) { + saved_priority[i] = priority[i]; + saved_priority[i + 1] = priority[i + 1]; + saved_priority[i + 2] = priority[i + 2]; + saved_priority[i + 3] = priority[i + 3]; + + saved_results[i] = results[i]; + saved_results[i + 1] = results[i + 1]; + saved_results[i + 2] = results[i + 2]; + saved_results[i + 3] = results[i + 3]; + } + } +} + +static inline uint32_t +scan_forward(uint32_t input, uint32_t max) +{ + return (input == 0) ? max : rte_bsf32(input); +} + +static inline uint64_t +scalar_transition(const uint64_t *trans_table, uint64_t transition, + uint8_t input) +{ + uint32_t addr, index, ranges, x, a, b, c; + + /* break transition into component parts */ + ranges = transition >> (sizeof(index) * CHAR_BIT); + index = transition & ~RTE_ACL_NODE_INDEX; + addr = transition ^ index; + + if (index != RTE_ACL_NODE_DFA) { + /* calc address for a QRANGE/SINGLE node */ + c = (uint32_t)input * SCALAR_QRANGE_MULT; + a = ranges | SCALAR_QRANGE_MIN; + a -= (c & SCALAR_QRANGE_MASK); + b = c & SCALAR_QRANGE_MIN; + a &= SCALAR_QRANGE_MIN; + a ^= (ranges ^ b) & (a ^ b); + x = scan_forward(a, 32) >> 3; + } else { + /* calc address for a DFA node */ + x = ranges >> (input / + RTE_ACL_DFA_GR64_SIZE * RTE_ACL_DFA_GR64_BIT); + x &= UINT8_MAX; + x = input - x; + } + + addr += x; + + /* pickup next transition */ + transition = *(trans_table + addr); + return transition; +} + +int +rte_acl_classify_scalar(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + int n; + uint64_t transition0, transition1; + uint32_t input0, input1; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_SCALAR]; + struct completion cmplt[MAX_SEARCHES_SCALAR]; + struct parms parms[MAX_SEARCHES_SCALAR]; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, num, + categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_SCALAR; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + transition0 = index_array[0]; + transition1 = index_array[1]; + + while ((transition0 | transition1) & RTE_ACL_NODE_MATCH) { + transition0 = acl_match_check(transition0, + 0, ctx, parms, &flows, resolve_priority_scalar); + transition1 = acl_match_check(transition1, + 1, ctx, parms, &flows, resolve_priority_scalar); + } + + while (flows.started > 0) { + + input0 = GET_NEXT_4BYTES(parms, 0); + input1 = GET_NEXT_4BYTES(parms, 1); + + for (n = 0; n < 4; n++) { + + transition0 = scalar_transition(flows.trans, + transition0, (uint8_t)input0); + input0 >>= CHAR_BIT; + + transition1 = scalar_transition(flows.trans, + transition1, (uint8_t)input1); + input1 >>= CHAR_BIT; + } + + while ((transition0 | transition1) & RTE_ACL_NODE_MATCH) { + transition0 = acl_match_check(transition0, + 0, ctx, parms, &flows, resolve_priority_scalar); + transition1 = acl_match_check(transition1, + 1, ctx, parms, &flows, resolve_priority_scalar); + } + } + return 0; +} diff --git a/lib/librte_acl/acl_run_sse.c b/lib/librte_acl/acl_run_sse.c new file mode 100644 index 00000000..a5a7d361 --- /dev/null +++ b/lib/librte_acl/acl_run_sse.c @@ -0,0 +1,47 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run_sse.h" + +int +rte_acl_classify_sse(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + if (likely(num >= MAX_SEARCHES_SSE8)) + return search_sse_8(ctx, data, results, num, categories); + else if (num >= MAX_SEARCHES_SSE4) + return search_sse_4(ctx, data, results, num, categories); + else + return rte_acl_classify_scalar(ctx, data, results, num, + categories); +} diff --git a/lib/librte_acl/acl_run_sse.h b/lib/librte_acl/acl_run_sse.h new file mode 100644 index 00000000..ad40a674 --- /dev/null +++ b/lib/librte_acl/acl_run_sse.h @@ -0,0 +1,357 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "acl_run.h" +#include "acl_vect.h" + +enum { + SHUFFLE32_SLOT1 = 0xe5, + SHUFFLE32_SLOT2 = 0xe6, + SHUFFLE32_SLOT3 = 0xe7, + SHUFFLE32_SWAP64 = 0x4e, +}; + +static const rte_xmm_t xmm_shuffle_input = { + .u32 = {0x00000000, 0x04040404, 0x08080808, 0x0c0c0c0c}, +}; + +static const rte_xmm_t xmm_ones_16 = { + .u16 = {1, 1, 1, 1, 1, 1, 1, 1}, +}; + +static const rte_xmm_t xmm_match_mask = { + .u32 = { + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + RTE_ACL_NODE_MATCH, + }, +}; + +static const rte_xmm_t xmm_index_mask = { + .u32 = { + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + RTE_ACL_NODE_INDEX, + }, +}; + +static const rte_xmm_t xmm_range_base = { + .u32 = { + 0xffffff00, 0xffffff04, 0xffffff08, 0xffffff0c, + }, +}; + +/* + * Resolve priority for multiple results (sse version). + * This consists comparing the priority of the current traversal with the + * running set of results for the packet. + * For each result, keep a running array of the result (rule number) and + * its priority for each category. + */ +static inline void +resolve_priority_sse(uint64_t transition, int n, const struct rte_acl_ctx *ctx, + struct parms *parms, const struct rte_acl_match_results *p, + uint32_t categories) +{ + uint32_t x; + xmm_t results, priority, results1, priority1, selector; + xmm_t *saved_results, *saved_priority; + + for (x = 0; x < categories; x += RTE_ACL_RESULTS_MULTIPLIER) { + + saved_results = (xmm_t *)(&parms[n].cmplt->results[x]); + saved_priority = + (xmm_t *)(&parms[n].cmplt->priority[x]); + + /* get results and priorities for completed trie */ + results = _mm_loadu_si128( + (const xmm_t *)&p[transition].results[x]); + priority = _mm_loadu_si128( + (const xmm_t *)&p[transition].priority[x]); + + /* if this is not the first completed trie */ + if (parms[n].cmplt->count != ctx->num_tries) { + + /* get running best results and their priorities */ + results1 = _mm_loadu_si128(saved_results); + priority1 = _mm_loadu_si128(saved_priority); + + /* select results that are highest priority */ + selector = _mm_cmpgt_epi32(priority1, priority); + results = _mm_blendv_epi8(results, results1, selector); + priority = _mm_blendv_epi8(priority, priority1, + selector); + } + + /* save running best results and their priorities */ + _mm_storeu_si128(saved_results, results); + _mm_storeu_si128(saved_priority, priority); + } +} + +/* + * Extract transitions from an XMM register and check for any matches + */ +static void +acl_process_matches(xmm_t *indices, int slot, const struct rte_acl_ctx *ctx, + struct parms *parms, struct acl_flow_data *flows) +{ + uint64_t transition1, transition2; + + /* extract transition from low 64 bits. */ + transition1 = _mm_cvtsi128_si64(*indices); + + /* extract transition from high 64 bits. */ + *indices = _mm_shuffle_epi32(*indices, SHUFFLE32_SWAP64); + transition2 = _mm_cvtsi128_si64(*indices); + + transition1 = acl_match_check(transition1, slot, ctx, + parms, flows, resolve_priority_sse); + transition2 = acl_match_check(transition2, slot + 1, ctx, + parms, flows, resolve_priority_sse); + + /* update indices with new transitions. */ + *indices = _mm_set_epi64x(transition2, transition1); +} + +/* + * Check for any match in 4 transitions (contained in 2 SSE registers) + */ +static inline __attribute__((always_inline)) void +acl_match_check_x4(int slot, const struct rte_acl_ctx *ctx, struct parms *parms, + struct acl_flow_data *flows, xmm_t *indices1, xmm_t *indices2, + xmm_t match_mask) +{ + xmm_t temp; + + /* put low 32 bits of each transition into one register */ + temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1, (__m128)*indices2, + 0x88); + /* test for match node */ + temp = _mm_and_si128(match_mask, temp); + + while (!_mm_testz_si128(temp, temp)) { + acl_process_matches(indices1, slot, ctx, parms, flows); + acl_process_matches(indices2, slot + 2, ctx, parms, flows); + + temp = (xmm_t)_mm_shuffle_ps((__m128)*indices1, + (__m128)*indices2, + 0x88); + temp = _mm_and_si128(match_mask, temp); + } +} + +/* + * Process 4 transitions (in 2 XMM registers) in parallel + */ +static inline __attribute__((always_inline)) xmm_t +transition4(xmm_t next_input, const uint64_t *trans, + xmm_t *indices1, xmm_t *indices2) +{ + xmm_t addr, tr_lo, tr_hi; + uint64_t trans0, trans2; + + /* Shuffle low 32 into tr_lo and high 32 into tr_hi */ + ACL_TR_HILO(mm, __m128, *indices1, *indices2, tr_lo, tr_hi); + + /* Calculate the address (array index) for all 4 transitions. */ + ACL_TR_CALC_ADDR(mm, 128, addr, xmm_index_mask.x, next_input, + xmm_shuffle_input.x, xmm_ones_16.x, xmm_range_base.x, + tr_lo, tr_hi); + + /* Gather 64 bit transitions and pack back into 2 registers. */ + + trans0 = trans[_mm_cvtsi128_si32(addr)]; + + /* get slot 2 */ + + /* {x0, x1, x2, x3} -> {x2, x1, x2, x3} */ + addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT2); + trans2 = trans[_mm_cvtsi128_si32(addr)]; + + /* get slot 1 */ + + /* {x2, x1, x2, x3} -> {x1, x1, x2, x3} */ + addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT1); + *indices1 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans0); + + /* get slot 3 */ + + /* {x1, x1, x2, x3} -> {x3, x1, x2, x3} */ + addr = _mm_shuffle_epi32(addr, SHUFFLE32_SLOT3); + *indices2 = _mm_set_epi64x(trans[_mm_cvtsi128_si32(addr)], trans2); + + return _mm_srli_epi32(next_input, CHAR_BIT); +} + +/* + * Execute trie traversal with 8 traversals in parallel + */ +static inline int +search_sse_8(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_SSE8]; + struct completion cmplt[MAX_SEARCHES_SSE8]; + struct parms parms[MAX_SEARCHES_SSE8]; + xmm_t input0, input1; + xmm_t indices1, indices2, indices3, indices4; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_SSE8; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + /* + * indices1 contains index_array[0,1] + * indices2 contains index_array[2,3] + * indices3 contains index_array[4,5] + * indices4 contains index_array[6,7] + */ + + indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]); + indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]); + + indices3 = _mm_loadu_si128((xmm_t *) &index_array[4]); + indices4 = _mm_loadu_si128((xmm_t *) &index_array[6]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + acl_match_check_x4(4, ctx, parms, &flows, + &indices3, &indices4, xmm_match_mask.x); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input0 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0)); + input1 = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 4)); + + input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 1), 1); + input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 5), 1); + + input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 2), 2); + input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 6), 2); + + input0 = _mm_insert_epi32(input0, GET_NEXT_4BYTES(parms, 3), 3); + input1 = _mm_insert_epi32(input1, GET_NEXT_4BYTES(parms, 7), 3); + + /* Process the 4 bytes of input on each stream. */ + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + input0 = transition4(input0, flows.trans, + &indices1, &indices2); + input1 = transition4(input1, flows.trans, + &indices3, &indices4); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + acl_match_check_x4(4, ctx, parms, &flows, + &indices3, &indices4, xmm_match_mask.x); + } + + return 0; +} + +/* + * Execute trie traversal with 4 traversals in parallel + */ +static inline int +search_sse_4(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, int total_packets, uint32_t categories) +{ + int n; + struct acl_flow_data flows; + uint64_t index_array[MAX_SEARCHES_SSE4]; + struct completion cmplt[MAX_SEARCHES_SSE4]; + struct parms parms[MAX_SEARCHES_SSE4]; + xmm_t input, indices1, indices2; + + acl_set_flow(&flows, cmplt, RTE_DIM(cmplt), data, results, + total_packets, categories, ctx->trans_table); + + for (n = 0; n < MAX_SEARCHES_SSE4; n++) { + cmplt[n].count = 0; + index_array[n] = acl_start_next_trie(&flows, parms, n, ctx); + } + + indices1 = _mm_loadu_si128((xmm_t *) &index_array[0]); + indices2 = _mm_loadu_si128((xmm_t *) &index_array[2]); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + + while (flows.started > 0) { + + /* Gather 4 bytes of input data for each stream. */ + input = _mm_cvtsi32_si128(GET_NEXT_4BYTES(parms, 0)); + input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 1), 1); + input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 2), 2); + input = _mm_insert_epi32(input, GET_NEXT_4BYTES(parms, 3), 3); + + /* Process the 4 bytes of input on each stream. */ + input = transition4(input, flows.trans, &indices1, &indices2); + input = transition4(input, flows.trans, &indices1, &indices2); + input = transition4(input, flows.trans, &indices1, &indices2); + input = transition4(input, flows.trans, &indices1, &indices2); + + /* Check for any matches. */ + acl_match_check_x4(0, ctx, parms, &flows, + &indices1, &indices2, xmm_match_mask.x); + } + + return 0; +} diff --git a/lib/librte_acl/acl_vect.h b/lib/librte_acl/acl_vect.h new file mode 100644 index 00000000..6cc19997 --- /dev/null +++ b/lib/librte_acl/acl_vect.h @@ -0,0 +1,116 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ACL_VECT_H_ +#define _RTE_ACL_VECT_H_ + +/** + * @file + * + * RTE ACL SSE/AVX related header. + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +/* + * Takes 2 SIMD registers containing N transitions eachi (tr0, tr1). + * Shuffles it into different representation: + * lo - contains low 32 bits of given N transitions. + * hi - contains high 32 bits of given N transitions. + */ +#define ACL_TR_HILO(P, TC, tr0, tr1, lo, hi) do { \ + lo = (typeof(lo))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0x88); \ + hi = (typeof(hi))_##P##_shuffle_ps((TC)(tr0), (TC)(tr1), 0xdd); \ +} while (0) + + +/* + * Calculate the address of the next transition for + * all types of nodes. Note that only DFA nodes and range + * nodes actually transition to another node. Match + * nodes not supposed to be encountered here. + * For quad range nodes: + * Calculate number of range boundaries that are less than the + * input value. Range boundaries for each node are in signed 8 bit, + * ordered from -128 to 127. + * This is effectively a popcnt of bytes that are greater than the + * input byte. + * Single nodes are processed in the same ways as quad range nodes. +*/ +#define ACL_TR_CALC_ADDR(P, S, \ + addr, index_mask, next_input, shuffle_input, \ + ones_16, range_base, tr_lo, tr_hi) do { \ + \ + typeof(addr) in, node_type, r, t; \ + typeof(addr) dfa_msk, dfa_ofs, quad_ofs; \ + \ + t = _##P##_xor_si##S(index_mask, index_mask); \ + in = _##P##_shuffle_epi8(next_input, shuffle_input); \ + \ + /* Calc node type and node addr */ \ + node_type = _##P##_andnot_si##S(index_mask, tr_lo); \ + addr = _##P##_and_si##S(index_mask, tr_lo); \ + \ + /* mask for DFA type(0) nodes */ \ + dfa_msk = _##P##_cmpeq_epi32(node_type, t); \ + \ + /* DFA calculations. */ \ + r = _##P##_srli_epi32(in, 30); \ + r = _##P##_add_epi8(r, range_base); \ + t = _##P##_srli_epi32(in, 24); \ + r = _##P##_shuffle_epi8(tr_hi, r); \ + \ + dfa_ofs = _##P##_sub_epi32(t, r); \ + \ + /* QUAD/SINGLE caluclations. */ \ + t = _##P##_cmpgt_epi8(in, tr_hi); \ + t = _##P##_sign_epi8(t, t); \ + t = _##P##_maddubs_epi16(t, t); \ + quad_ofs = _##P##_madd_epi16(t, ones_16); \ + \ + /* blend DFA and QUAD/SINGLE. */ \ + t = _##P##_blendv_epi8(quad_ofs, dfa_ofs, dfa_msk); \ + \ + /* calculate address for next transitions. */ \ + addr = _##P##_add_epi32(addr, t); \ +} while (0) + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ACL_VECT_H_ */ diff --git a/lib/librte_acl/rte_acl.c b/lib/librte_acl/rte_acl.c new file mode 100644 index 00000000..4ba9786b --- /dev/null +++ b/lib/librte_acl/rte_acl.c @@ -0,0 +1,393 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "acl.h" + +TAILQ_HEAD(rte_acl_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_acl_tailq = { + .name = "RTE_ACL", +}; +EAL_REGISTER_TAILQ(rte_acl_tailq) + +/* + * If the compiler doesn't support AVX2 instructions, + * then the dummy one would be used instead for AVX2 classify method. + */ +int __attribute__ ((weak)) +rte_acl_classify_avx2(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + +int __attribute__ ((weak)) +rte_acl_classify_sse(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + +int __attribute__ ((weak)) +rte_acl_classify_neon(__rte_unused const struct rte_acl_ctx *ctx, + __rte_unused const uint8_t **data, + __rte_unused uint32_t *results, + __rte_unused uint32_t num, + __rte_unused uint32_t categories) +{ + return -ENOTSUP; +} + +static const rte_acl_classify_t classify_fns[] = { + [RTE_ACL_CLASSIFY_DEFAULT] = rte_acl_classify_scalar, + [RTE_ACL_CLASSIFY_SCALAR] = rte_acl_classify_scalar, + [RTE_ACL_CLASSIFY_SSE] = rte_acl_classify_sse, + [RTE_ACL_CLASSIFY_AVX2] = rte_acl_classify_avx2, + [RTE_ACL_CLASSIFY_NEON] = rte_acl_classify_neon, +}; + +/* by default, use always available scalar code path. */ +static enum rte_acl_classify_alg rte_acl_default_classify = + RTE_ACL_CLASSIFY_SCALAR; + +static void +rte_acl_set_default_classify(enum rte_acl_classify_alg alg) +{ + rte_acl_default_classify = alg; +} + +extern int +rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, enum rte_acl_classify_alg alg) +{ + if (ctx == NULL || (uint32_t)alg >= RTE_DIM(classify_fns)) + return -EINVAL; + + ctx->alg = alg; + return 0; +} + +/* + * Select highest available classify method as default one. + * Note that CLASSIFY_AVX2 should be set as a default only + * if both conditions are met: + * at build time compiler supports AVX2 and target cpu supports AVX2. + */ +static void __attribute__((constructor)) +rte_acl_init(void) +{ + enum rte_acl_classify_alg alg = RTE_ACL_CLASSIFY_DEFAULT; + +#if defined(RTE_ARCH_ARM64) + alg = RTE_ACL_CLASSIFY_NEON; +#elif defined(RTE_ARCH_ARM) + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_NEON)) + alg = RTE_ACL_CLASSIFY_NEON; +#else +#ifdef CC_AVX2_SUPPORT + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2)) + alg = RTE_ACL_CLASSIFY_AVX2; + else if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1)) +#else + if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_1)) +#endif + alg = RTE_ACL_CLASSIFY_SSE; + +#endif + rte_acl_set_default_classify(alg); +} + +int +rte_acl_classify_alg(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories, + enum rte_acl_classify_alg alg) +{ + if (categories != 1 && + ((RTE_ACL_RESULTS_MULTIPLIER - 1) & categories) != 0) + return -EINVAL; + + return classify_fns[alg](ctx, data, results, num, categories); +} + +int +rte_acl_classify(const struct rte_acl_ctx *ctx, const uint8_t **data, + uint32_t *results, uint32_t num, uint32_t categories) +{ + return rte_acl_classify_alg(ctx, data, results, num, categories, + ctx->alg); +} + +struct rte_acl_ctx * +rte_acl_find_existing(const char *name) +{ + struct rte_acl_ctx *ctx = NULL; + struct rte_acl_list *acl_list; + struct rte_tailq_entry *te; + + acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, acl_list, next) { + ctx = (struct rte_acl_ctx *) te->data; + if (strncmp(name, ctx->name, sizeof(ctx->name)) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + return ctx; +} + +void +rte_acl_free(struct rte_acl_ctx *ctx) +{ + struct rte_acl_list *acl_list; + struct rte_tailq_entry *te; + + if (ctx == NULL) + return; + + acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find our tailq entry */ + TAILQ_FOREACH(te, acl_list, next) { + if (te->data == (void *) ctx) + break; + } + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(acl_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(ctx->mem); + rte_free(ctx); + rte_free(te); +} + +struct rte_acl_ctx * +rte_acl_create(const struct rte_acl_param *param) +{ + size_t sz; + struct rte_acl_ctx *ctx; + struct rte_acl_list *acl_list; + struct rte_tailq_entry *te; + char name[sizeof(ctx->name)]; + + acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list); + + /* check that input parameters are valid. */ + if (param == NULL || param->name == NULL) { + rte_errno = EINVAL; + return NULL; + } + + snprintf(name, sizeof(name), "ACL_%s", param->name); + + /* calculate amount of memory required for pattern set. */ + sz = sizeof(*ctx) + param->max_rule_num * param->rule_size; + + /* get EAL TAILQ lock. */ + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* if we already have one with that name */ + TAILQ_FOREACH(te, acl_list, next) { + ctx = (struct rte_acl_ctx *) te->data; + if (strncmp(param->name, ctx->name, sizeof(ctx->name)) == 0) + break; + } + + /* if ACL with such name doesn't exist, then create a new one. */ + if (te == NULL) { + ctx = NULL; + te = rte_zmalloc("ACL_TAILQ_ENTRY", sizeof(*te), 0); + + if (te == NULL) { + RTE_LOG(ERR, ACL, "Cannot allocate tailq entry!\n"); + goto exit; + } + + ctx = rte_zmalloc_socket(name, sz, RTE_CACHE_LINE_SIZE, param->socket_id); + + if (ctx == NULL) { + RTE_LOG(ERR, ACL, + "allocation of %zu bytes on socket %d for %s failed\n", + sz, param->socket_id, name); + rte_free(te); + goto exit; + } + /* init new allocated context. */ + ctx->rules = ctx + 1; + ctx->max_rules = param->max_rule_num; + ctx->rule_sz = param->rule_size; + ctx->socket_id = param->socket_id; + ctx->alg = rte_acl_default_classify; + snprintf(ctx->name, sizeof(ctx->name), "%s", param->name); + + te->data = (void *) ctx; + + TAILQ_INSERT_TAIL(acl_list, te, next); + } + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return ctx; +} + +static int +acl_add_rules(struct rte_acl_ctx *ctx, const void *rules, uint32_t num) +{ + uint8_t *pos; + + if (num + ctx->num_rules > ctx->max_rules) + return -ENOMEM; + + pos = ctx->rules; + pos += ctx->rule_sz * ctx->num_rules; + memcpy(pos, rules, num * ctx->rule_sz); + ctx->num_rules += num; + + return 0; +} + +static int +acl_check_rule(const struct rte_acl_rule_data *rd) +{ + if ((RTE_LEN2MASK(RTE_ACL_MAX_CATEGORIES, typeof(rd->category_mask)) & + rd->category_mask) == 0 || + rd->priority > RTE_ACL_MAX_PRIORITY || + rd->priority < RTE_ACL_MIN_PRIORITY || + rd->userdata == RTE_ACL_INVALID_USERDATA) + return -EINVAL; + return 0; +} + +int +rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules, + uint32_t num) +{ + const struct rte_acl_rule *rv; + uint32_t i; + int32_t rc; + + if (ctx == NULL || rules == NULL || 0 == ctx->rule_sz) + return -EINVAL; + + for (i = 0; i != num; i++) { + rv = (const struct rte_acl_rule *) + ((uintptr_t)rules + i * ctx->rule_sz); + rc = acl_check_rule(&rv->data); + if (rc != 0) { + RTE_LOG(ERR, ACL, "%s(%s): rule #%u is invalid\n", + __func__, ctx->name, i + 1); + return rc; + } + } + + return acl_add_rules(ctx, rules, num); +} + +/* + * Reset all rules. + * Note that RT structures are not affected. + */ +void +rte_acl_reset_rules(struct rte_acl_ctx *ctx) +{ + if (ctx != NULL) + ctx->num_rules = 0; +} + +/* + * Reset all rules and destroys RT structures. + */ +void +rte_acl_reset(struct rte_acl_ctx *ctx) +{ + if (ctx != NULL) { + rte_acl_reset_rules(ctx); + rte_acl_build(ctx, &ctx->config); + } +} + +/* + * Dump ACL context to the stdout. + */ +void +rte_acl_dump(const struct rte_acl_ctx *ctx) +{ + if (!ctx) + return; + printf("acl context <%s>@%p\n", ctx->name, ctx); + printf(" socket_id=%"PRId32"\n", ctx->socket_id); + printf(" alg=%"PRId32"\n", ctx->alg); + printf(" max_rules=%"PRIu32"\n", ctx->max_rules); + printf(" rule_size=%"PRIu32"\n", ctx->rule_sz); + printf(" num_rules=%"PRIu32"\n", ctx->num_rules); + printf(" num_categories=%"PRIu32"\n", ctx->num_categories); + printf(" num_tries=%"PRIu32"\n", ctx->num_tries); +} + +/* + * Dump all ACL contexts to the stdout. + */ +void +rte_acl_list_dump(void) +{ + struct rte_acl_ctx *ctx; + struct rte_acl_list *acl_list; + struct rte_tailq_entry *te; + + acl_list = RTE_TAILQ_CAST(rte_acl_tailq.head, rte_acl_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, acl_list, next) { + ctx = (struct rte_acl_ctx *) te->data; + rte_acl_dump(ctx); + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); +} diff --git a/lib/librte_acl/rte_acl.h b/lib/librte_acl/rte_acl.h new file mode 100644 index 00000000..0979a098 --- /dev/null +++ b/lib/librte_acl/rte_acl.h @@ -0,0 +1,388 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ACL_H_ +#define _RTE_ACL_H_ + +/** + * @file + * + * RTE Classifier. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_ACL_MAX_CATEGORIES 16 + +#define RTE_ACL_RESULTS_MULTIPLIER (XMM_SIZE / sizeof(uint32_t)) + +#define RTE_ACL_MAX_LEVELS 64 +#define RTE_ACL_MAX_FIELDS 64 + +union rte_acl_field_types { + uint8_t u8; + uint16_t u16; + uint32_t u32; + uint64_t u64; +}; + +enum { + RTE_ACL_FIELD_TYPE_MASK = 0, + RTE_ACL_FIELD_TYPE_RANGE, + RTE_ACL_FIELD_TYPE_BITMASK +}; + +/** + * ACL Field definition. + * Each field in the ACL rule has an associate definition. + * It defines the type of field, its size, its offset in the input buffer, + * the field index, and the input index. + * For performance reasons, the inner loop of the search function is unrolled + * to process four input bytes at a time. This requires the input to be grouped + * into sets of 4 consecutive bytes. The loop processes the first input byte as + * part of the setup and then subsequent bytes must be in groups of 4 + * consecutive bytes. + */ +struct rte_acl_field_def { + uint8_t type; /**< type - RTE_ACL_FIELD_TYPE_*. */ + uint8_t size; /**< size of field 1,2,4, or 8. */ + uint8_t field_index; /**< index of field inside the rule. */ + uint8_t input_index; /**< 0-N input index. */ + uint32_t offset; /**< offset to start of field. */ +}; + +/** + * ACL build configuration. + * Defines the fields of an ACL trie and number of categories to build with. + */ +struct rte_acl_config { + uint32_t num_categories; /**< Number of categories to build with. */ + uint32_t num_fields; /**< Number of field definitions. */ + struct rte_acl_field_def defs[RTE_ACL_MAX_FIELDS]; + /**< array of field definitions. */ + size_t max_size; + /**< max memory limit for internal run-time structures. */ +}; + +/** + * Defines the value of a field for a rule. + */ +struct rte_acl_field { + union rte_acl_field_types value; + /**< a 1,2,4, or 8 byte value of the field. */ + union rte_acl_field_types mask_range; + /**< + * depending on field type: + * mask -> 1.2.3.4/32 value=0x1020304, mask_range=32, + * range -> 0 : 65535 value=0, mask_range=65535, + * bitmask -> 0x06/0xff value=6, mask_range=0xff. + */ +}; + +enum { + RTE_ACL_TYPE_SHIFT = 29, + RTE_ACL_MAX_INDEX = RTE_LEN2MASK(RTE_ACL_TYPE_SHIFT, uint32_t), + RTE_ACL_MAX_PRIORITY = RTE_ACL_MAX_INDEX, + RTE_ACL_MIN_PRIORITY = 0, +}; + +#define RTE_ACL_INVALID_USERDATA 0 + +#define RTE_ACL_MASKLEN_TO_BITMASK(v, s) \ +((v) == 0 ? (v) : (typeof(v))((uint64_t)-1 << ((s) * CHAR_BIT - (v)))) + +/** + * Miscellaneous data for ACL rule. + */ +struct rte_acl_rule_data { + uint32_t category_mask; /**< Mask of categories for that rule. */ + int32_t priority; /**< Priority for that rule. */ + uint32_t userdata; /**< Associated with the rule user data. */ +}; + +/** + * Defines single ACL rule. + * data - miscellaneous data for the rule. + * field[] - value and mask or range for each field. + */ +#define RTE_ACL_RULE_DEF(name, fld_num) struct name {\ + struct rte_acl_rule_data data; \ + struct rte_acl_field field[fld_num]; \ +} + +RTE_ACL_RULE_DEF(rte_acl_rule, 0); + +#define RTE_ACL_RULE_SZ(fld_num) \ + (sizeof(struct rte_acl_rule) + sizeof(struct rte_acl_field) * (fld_num)) + + +/** Max number of characters in name.*/ +#define RTE_ACL_NAMESIZE 32 + +/** + * Parameters used when creating the ACL context. + */ +struct rte_acl_param { + const char *name; /**< Name of the ACL context. */ + int socket_id; /**< Socket ID to allocate memory for. */ + uint32_t rule_size; /**< Size of each rule. */ + uint32_t max_rule_num; /**< Maximum number of rules. */ +}; + + +/** + * Create a new ACL context. + * + * @param param + * Parameters used to create and initialise the ACL context. + * @return + * Pointer to ACL context structure that is used in future ACL + * operations, or NULL on error, with error code set in rte_errno. + * Possible rte_errno errors include: + * - EINVAL - invalid parameter passed to function + */ +struct rte_acl_ctx * +rte_acl_create(const struct rte_acl_param *param); + +/** + * Find an existing ACL context object and return a pointer to it. + * + * @param name + * Name of the ACL context as passed to rte_acl_create() + * @return + * Pointer to ACL context or NULL if object not found + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - value not available for return + */ +struct rte_acl_ctx * +rte_acl_find_existing(const char *name); + +/** + * De-allocate all memory used by ACL context. + * + * @param ctx + * ACL context to free + */ +void +rte_acl_free(struct rte_acl_ctx *ctx); + +/** + * Add rules to an existing ACL context. + * This function is not multi-thread safe. + * + * @param ctx + * ACL context to add patterns to. + * @param rules + * Array of rules to add to the ACL context. + * Note that all fields in rte_acl_rule structures are expected + * to be in host byte order. + * Each rule expected to be in the same format and not exceed size + * specified at ACL context creation time. + * @param num + * Number of elements in the input array of rules. + * @return + * - -ENOMEM if there is no space in the ACL context for these rules. + * - -EINVAL if the parameters are invalid. + * - Zero if operation completed successfully. + */ +int +rte_acl_add_rules(struct rte_acl_ctx *ctx, const struct rte_acl_rule *rules, + uint32_t num); + +/** + * Delete all rules from the ACL context. + * This function is not multi-thread safe. + * Note that internal run-time structures are not affected. + * + * @param ctx + * ACL context to delete rules from. + */ +void +rte_acl_reset_rules(struct rte_acl_ctx *ctx); + +/** + * Analyze set of rules and build required internal run-time structures. + * This function is not multi-thread safe. + * + * @param ctx + * ACL context to build. + * @param cfg + * Pointer to struct rte_acl_config - defines build parameters. + * @return + * - -ENOMEM if couldn't allocate enough memory. + * - -EINVAL if the parameters are invalid. + * - Negative error code if operation failed. + * - Zero if operation completed successfully. + */ +int +rte_acl_build(struct rte_acl_ctx *ctx, const struct rte_acl_config *cfg); + +/** + * Delete all rules from the ACL context and + * destroy all internal run-time structures. + * This function is not multi-thread safe. + * + * @param ctx + * ACL context to reset. + */ +void +rte_acl_reset(struct rte_acl_ctx *ctx); + +/** + * Available implementations of ACL classify. + */ +enum rte_acl_classify_alg { + RTE_ACL_CLASSIFY_DEFAULT = 0, + RTE_ACL_CLASSIFY_SCALAR = 1, /**< generic implementation. */ + RTE_ACL_CLASSIFY_SSE = 2, /**< requires SSE4.1 support. */ + RTE_ACL_CLASSIFY_AVX2 = 3, /**< requires AVX2 support. */ + RTE_ACL_CLASSIFY_NEON = 4, /**< requires NEON support. */ + RTE_ACL_CLASSIFY_NUM /* should always be the last one. */ +}; + +/** + * Perform search for a matching ACL rule for each input data buffer. + * Each input data buffer can have up to *categories* matches. + * That implies that results array should be big enough to hold + * (categories * num) elements. + * Also categories parameter should be either one or multiple of + * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES. + * If more than one rule is applicable for given input buffer and + * given category, then rule with highest priority will be returned as a match. + * Note, that it is a caller's responsibility to ensure that input parameters + * are valid and point to correct memory locations. + * + * @param ctx + * ACL context to search with. + * @param data + * Array of pointers to input data buffers to perform search. + * Note that all fields in input data buffers supposed to be in network + * byte order (MSB). + * @param results + * Array of search results, *categories* results per each input data buffer. + * @param num + * Number of elements in the input data buffers array. + * @param categories + * Number of maximum possible matches for each input buffer, one possible + * match per category. + * @return + * zero on successful completion. + * -EINVAL for incorrect arguments. + */ +extern int +rte_acl_classify(const struct rte_acl_ctx *ctx, + const uint8_t **data, + uint32_t *results, uint32_t num, + uint32_t categories); + +/** + * Perform search using specified algorithm for a matching ACL rule for + * each input data buffer. + * Each input data buffer can have up to *categories* matches. + * That implies that results array should be big enough to hold + * (categories * num) elements. + * Also categories parameter should be either one or multiple of + * RTE_ACL_RESULTS_MULTIPLIER and can't be bigger than RTE_ACL_MAX_CATEGORIES. + * If more than one rule is applicable for given input buffer and + * given category, then rule with highest priority will be returned as a match. + * Note, that it is a caller's responsibility to ensure that input parameters + * are valid and point to correct memory locations. + * + * @param ctx + * ACL context to search with. + * @param data + * Array of pointers to input data buffers to perform search. + * Note that all fields in input data buffers supposed to be in network + * byte order (MSB). + * @param results + * Array of search results, *categories* results per each input data buffer. + * @param num + * Number of elements in the input data buffers array. + * @param categories + * Number of maximum possible matches for each input buffer, one possible + * match per category. + * @param alg + * Algorithm to be used for the search. + * It is the caller responsibility to ensure that the value refers to the + * existing algorithm, and that it could be run on the given CPU. + * @return + * zero on successful completion. + * -EINVAL for incorrect arguments. + */ +extern int +rte_acl_classify_alg(const struct rte_acl_ctx *ctx, + const uint8_t **data, + uint32_t *results, uint32_t num, + uint32_t categories, + enum rte_acl_classify_alg alg); + +/* + * Override the default classifier function for a given ACL context. + * @param ctx + * ACL context to change classify function for. + * @param alg + * New default classify algorithm for given ACL context. + * It is the caller responsibility to ensure that the value refers to the + * existing algorithm, and that it could be run on the given CPU. + * @return + * - -EINVAL if the parameters are invalid. + * - Zero if operation completed successfully. + */ +extern int +rte_acl_set_ctx_classify(struct rte_acl_ctx *ctx, + enum rte_acl_classify_alg alg); + +/** + * Dump an ACL context structure to the console. + * + * @param ctx + * ACL context to dump. + */ +void +rte_acl_dump(const struct rte_acl_ctx *ctx); + +/** + * Dump all ACL context structures to the console. + */ +void +rte_acl_list_dump(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ACL_H_ */ diff --git a/lib/librte_acl/rte_acl_osdep.h b/lib/librte_acl/rte_acl_osdep.h new file mode 100644 index 00000000..41f7e3d4 --- /dev/null +++ b/lib/librte_acl/rte_acl_osdep.h @@ -0,0 +1,80 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ACL_OSDEP_H_ +#define _RTE_ACL_OSDEP_H_ + +/** + * @file + * + * RTE ACL DPDK/OS dependent file. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Common defines. + */ + +#define DIM(x) RTE_DIM(x) + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#endif /* _RTE_ACL_OSDEP_H_ */ diff --git a/lib/librte_acl/rte_acl_version.map b/lib/librte_acl/rte_acl_version.map new file mode 100644 index 00000000..b09370a1 --- /dev/null +++ b/lib/librte_acl/rte_acl_version.map @@ -0,0 +1,19 @@ +DPDK_2.0 { + global: + + rte_acl_add_rules; + rte_acl_build; + rte_acl_classify; + rte_acl_classify_alg; + rte_acl_classify_scalar; + rte_acl_create; + rte_acl_dump; + rte_acl_find_existing; + rte_acl_free; + rte_acl_list_dump; + rte_acl_reset; + rte_acl_reset_rules; + rte_acl_set_ctx_classify; + + local: *; +}; diff --git a/lib/librte_acl/tb_mem.c b/lib/librte_acl/tb_mem.c new file mode 100644 index 00000000..157e6080 --- /dev/null +++ b/lib/librte_acl/tb_mem.c @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "tb_mem.h" + +/* + * Memory management routines for temporary memory. + * That memory is used only during build phase and is released after + * build is finished. + * Note, that tb_pool/tb_alloc() are not supposed to return NULL. + * Instead, in the case of failure to allocate memory, + * it would do siglongjmp(pool->fail). + * It is responsibility of the caller to save the proper context/environment, + * in the pool->fail before calling tb_alloc() for the given pool first time. + */ + +static struct tb_mem_block * +tb_pool(struct tb_mem_pool *pool, size_t sz) +{ + struct tb_mem_block *block; + uint8_t *ptr; + size_t size; + + size = sz + pool->alignment - 1; + block = calloc(1, size + sizeof(*pool->block)); + if (block == NULL) { + RTE_LOG(ERR, MALLOC, "%s(%zu)\n failed, currently allocated " + "by pool: %zu bytes\n", __func__, sz, pool->alloc); + siglongjmp(pool->fail, -ENOMEM); + return NULL; + } + + block->pool = pool; + + block->next = pool->block; + pool->block = block; + + pool->alloc += size; + + ptr = (uint8_t *)(block + 1); + block->mem = RTE_PTR_ALIGN_CEIL(ptr, pool->alignment); + block->size = size - (block->mem - ptr); + + return block; +} + +void * +tb_alloc(struct tb_mem_pool *pool, size_t size) +{ + struct tb_mem_block *block; + void *ptr; + size_t new_sz; + + size = RTE_ALIGN_CEIL(size, pool->alignment); + + block = pool->block; + if (block == NULL || block->size < size) { + new_sz = (size > pool->min_alloc) ? size : pool->min_alloc; + block = tb_pool(pool, new_sz); + } + ptr = block->mem; + block->size -= size; + block->mem += size; + return ptr; +} + +void +tb_free_pool(struct tb_mem_pool *pool) +{ + struct tb_mem_block *next, *block; + + for (block = pool->block; block != NULL; block = next) { + next = block->next; + free(block); + } + pool->block = NULL; + pool->alloc = 0; +} diff --git a/lib/librte_acl/tb_mem.h b/lib/librte_acl/tb_mem.h new file mode 100644 index 00000000..ca7af966 --- /dev/null +++ b/lib/librte_acl/tb_mem.h @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _TB_MEM_H_ +#define _TB_MEM_H_ + +/** + * @file + * + * RTE ACL temporary (build phase) memory management. + * Contains structures and functions to manage temporary (used by build only) + * memory. Memory allocated in large blocks to speed 'free' when trie is + * destructed (finish of build phase). + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +struct tb_mem_block { + struct tb_mem_block *next; + struct tb_mem_pool *pool; + size_t size; + uint8_t *mem; +}; + +struct tb_mem_pool { + struct tb_mem_block *block; + size_t alignment; + size_t min_alloc; + size_t alloc; + /* jump target in case of memory allocation failure. */ + sigjmp_buf fail; +}; + +void *tb_alloc(struct tb_mem_pool *pool, size_t size); +void tb_free_pool(struct tb_mem_pool *pool); + +#ifdef __cplusplus +} +#endif + +#endif /* _TB_MEM_H_ */ diff --git a/lib/librte_cfgfile/Makefile b/lib/librte_cfgfile/Makefile new file mode 100644 index 00000000..616aef09 --- /dev/null +++ b/lib/librte_cfgfile/Makefile @@ -0,0 +1,57 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_cfgfile.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_cfgfile_version.map + +LIBABIVER := 2 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_CFGFILE) += rte_cfgfile.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_CFGFILE)-include += rte_cfgfile.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_CFGFILE) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_cfgfile/rte_cfgfile.c b/lib/librte_cfgfile/rte_cfgfile.c new file mode 100644 index 00000000..75625a28 --- /dev/null +++ b/lib/librte_cfgfile/rte_cfgfile.c @@ -0,0 +1,374 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include "rte_cfgfile.h" + +struct rte_cfgfile_section { + char name[CFG_NAME_LEN]; + int num_entries; + struct rte_cfgfile_entry *entries[0]; +}; + +struct rte_cfgfile { + int flags; + int num_sections; + struct rte_cfgfile_section *sections[0]; +}; + +/** when we resize a file structure, how many extra entries + * for new sections do we add in */ +#define CFG_ALLOC_SECTION_BATCH 8 +/** when we resize a section structure, how many extra entries + * for new entries do we add in */ +#define CFG_ALLOC_ENTRY_BATCH 16 + +static unsigned +_strip(char *str, unsigned len) +{ + int newlen = len; + if (len == 0) + return 0; + + if (isspace(str[len-1])) { + /* strip trailing whitespace */ + while (newlen > 0 && isspace(str[newlen - 1])) + str[--newlen] = '\0'; + } + + if (isspace(str[0])) { + /* strip leading whitespace */ + int i, start = 1; + while (isspace(str[start]) && start < newlen) + start++ + ; /* do nothing */ + newlen -= start; + for (i = 0; i < newlen; i++) + str[i] = str[i+start]; + str[i] = '\0'; + } + return newlen; +} + +struct rte_cfgfile * +rte_cfgfile_load(const char *filename, int flags) +{ + int allocated_sections = CFG_ALLOC_SECTION_BATCH; + int allocated_entries = 0; + int curr_section = -1; + int curr_entry = -1; + char buffer[256] = {0}; + int lineno = 0; + struct rte_cfgfile *cfg = NULL; + + FILE *f = fopen(filename, "r"); + if (f == NULL) + return NULL; + + cfg = malloc(sizeof(*cfg) + sizeof(cfg->sections[0]) * + allocated_sections); + if (cfg == NULL) + goto error2; + + memset(cfg->sections, 0, sizeof(cfg->sections[0]) * allocated_sections); + + while (fgets(buffer, sizeof(buffer), f) != NULL) { + char *pos = NULL; + size_t len = strnlen(buffer, sizeof(buffer)); + lineno++; + if ((len >= sizeof(buffer) - 1) && (buffer[len-1] != '\n')) { + printf("Error line %d - no \\n found on string. " + "Check if line too long\n", lineno); + goto error1; + } + pos = memchr(buffer, ';', sizeof(buffer)); + if (pos != NULL) { + *pos = '\0'; + len = pos - buffer; + } + + len = _strip(buffer, len); + if (buffer[0] != '[' && memchr(buffer, '=', len) == NULL) + continue; + + if (buffer[0] == '[') { + /* section heading line */ + char *end = memchr(buffer, ']', len); + if (end == NULL) { + printf("Error line %d - no terminating '['" + "character found\n", lineno); + goto error1; + } + *end = '\0'; + _strip(&buffer[1], end - &buffer[1]); + + /* close off old section and add start new one */ + if (curr_section >= 0) + cfg->sections[curr_section]->num_entries = + curr_entry + 1; + curr_section++; + + /* resize overall struct if we don't have room for more + sections */ + if (curr_section == allocated_sections) { + allocated_sections += CFG_ALLOC_SECTION_BATCH; + struct rte_cfgfile *n_cfg = realloc(cfg, + sizeof(*cfg) + sizeof(cfg->sections[0]) + * allocated_sections); + if (n_cfg == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + cfg = n_cfg; + } + + /* allocate space for new section */ + allocated_entries = CFG_ALLOC_ENTRY_BATCH; + curr_entry = -1; + cfg->sections[curr_section] = malloc( + sizeof(*cfg->sections[0]) + + sizeof(cfg->sections[0]->entries[0]) * + allocated_entries); + if (cfg->sections[curr_section] == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + + snprintf(cfg->sections[curr_section]->name, + sizeof(cfg->sections[0]->name), + "%s", &buffer[1]); + } else { + /* value line */ + if (curr_section < 0) { + printf("Error line %d - value outside of" + "section\n", lineno); + goto error1; + } + + struct rte_cfgfile_section *sect = + cfg->sections[curr_section]; + char *split[2]; + if (rte_strsplit(buffer, sizeof(buffer), split, 2, '=') + != 2) { + printf("Error at line %d - cannot split " + "string\n", lineno); + goto error1; + } + + curr_entry++; + if (curr_entry == allocated_entries) { + allocated_entries += CFG_ALLOC_ENTRY_BATCH; + struct rte_cfgfile_section *n_sect = realloc( + sect, sizeof(*sect) + + sizeof(sect->entries[0]) * + allocated_entries); + if (n_sect == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + sect = cfg->sections[curr_section] = n_sect; + } + + sect->entries[curr_entry] = malloc( + sizeof(*sect->entries[0])); + if (sect->entries[curr_entry] == NULL) { + printf("Error - no more memory\n"); + goto error1; + } + + struct rte_cfgfile_entry *entry = sect->entries[ + curr_entry]; + snprintf(entry->name, sizeof(entry->name), "%s", + split[0]); + snprintf(entry->value, sizeof(entry->value), "%s", + split[1]); + _strip(entry->name, strnlen(entry->name, + sizeof(entry->name))); + _strip(entry->value, strnlen(entry->value, + sizeof(entry->value))); + } + } + fclose(f); + cfg->flags = flags; + cfg->num_sections = curr_section + 1; + /* curr_section will still be -1 if we have an empty file */ + if (curr_section >= 0) + cfg->sections[curr_section]->num_entries = curr_entry + 1; + return cfg; + +error1: + rte_cfgfile_close(cfg); +error2: + fclose(f); + return NULL; +} + + +int rte_cfgfile_close(struct rte_cfgfile *cfg) +{ + int i, j; + + if (cfg == NULL) + return -1; + + for (i = 0; i < cfg->num_sections; i++) { + if (cfg->sections[i] != NULL) { + if (cfg->sections[i]->num_entries) { + for (j = 0; j < cfg->sections[i]->num_entries; + j++) { + if (cfg->sections[i]->entries[j] != + NULL) + free(cfg->sections[i]-> + entries[j]); + } + } + free(cfg->sections[i]); + } + } + free(cfg); + + return 0; +} + +int +rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sectionname, +size_t length) +{ + int i; + int num_sections = 0; + for (i = 0; i < cfg->num_sections; i++) { + if (strncmp(cfg->sections[i]->name, sectionname, length) == 0) + num_sections++; + } + return num_sections; +} + +int +rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], + int max_sections) +{ + int i; + + for (i = 0; i < cfg->num_sections && i < max_sections; i++) + snprintf(sections[i], CFG_NAME_LEN, "%s", + cfg->sections[i]->name); + + return i; +} + +static const struct rte_cfgfile_section * +_get_section(struct rte_cfgfile *cfg, const char *sectionname) +{ + int i; + for (i = 0; i < cfg->num_sections; i++) { + if (strncmp(cfg->sections[i]->name, sectionname, + sizeof(cfg->sections[0]->name)) == 0) + return cfg->sections[i]; + } + return NULL; +} + +int +rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname) +{ + return _get_section(cfg, sectionname) != NULL; +} + +int +rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, + const char *sectionname) +{ + const struct rte_cfgfile_section *s = _get_section(cfg, sectionname); + if (s == NULL) + return -1; + return s->num_entries; +} + + +int +rte_cfgfile_section_entries(struct rte_cfgfile *cfg, const char *sectionname, + struct rte_cfgfile_entry *entries, int max_entries) +{ + int i; + const struct rte_cfgfile_section *sect = _get_section(cfg, sectionname); + if (sect == NULL) + return -1; + for (i = 0; i < max_entries && i < sect->num_entries; i++) + entries[i] = *sect->entries[i]; + return i; +} + +int +rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, int index, + char *sectionname, + struct rte_cfgfile_entry *entries, int max_entries) +{ + int i; + const struct rte_cfgfile_section *sect; + + if (index < 0 || index >= cfg->num_sections) + return -1; + + sect = cfg->sections[index]; + snprintf(sectionname, CFG_NAME_LEN, "%s", sect->name); + for (i = 0; i < max_entries && i < sect->num_entries; i++) + entries[i] = *sect->entries[i]; + return i; +} + +const char * +rte_cfgfile_get_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname) +{ + int i; + const struct rte_cfgfile_section *sect = _get_section(cfg, sectionname); + if (sect == NULL) + return NULL; + for (i = 0; i < sect->num_entries; i++) + if (strncmp(sect->entries[i]->name, entryname, CFG_NAME_LEN) + == 0) + return sect->entries[i]->value; + return NULL; +} + +int +rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname) +{ + return rte_cfgfile_get_entry(cfg, sectionname, entryname) != NULL; +} diff --git a/lib/librte_cfgfile/rte_cfgfile.h b/lib/librte_cfgfile/rte_cfgfile.h new file mode 100644 index 00000000..834f8287 --- /dev/null +++ b/lib/librte_cfgfile/rte_cfgfile.h @@ -0,0 +1,239 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_CFGFILE_H__ +#define __INCLUDE_RTE_CFGFILE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* @file +* RTE Configuration File +* +* This library allows reading application defined parameters from standard +* format configuration file. +* +***/ + +#ifndef CFG_NAME_LEN +#define CFG_NAME_LEN 64 +#endif + +#ifndef CFG_VALUE_LEN +#define CFG_VALUE_LEN 256 +#endif + +/** Configuration file */ +struct rte_cfgfile; + +/** Configuration file entry */ +struct rte_cfgfile_entry { + char name[CFG_NAME_LEN]; /**< Name */ + char value[CFG_VALUE_LEN]; /**< Value */ +}; + +/** +* Open config file +* +* @param filename +* Config file name +* @param flags +* Config file flags, Reserved for future use. Must be set to 0. +* @return +* Handle to configuration file +*/ +struct rte_cfgfile *rte_cfgfile_load(const char *filename, int flags); + +/** +* Get number of sections in config file +* +* @param cfg +* Config file +* @param sec_name +* Section name +* @param length +* Maximum section name length +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_num_sections(struct rte_cfgfile *cfg, const char *sec_name, + size_t length); + +/** +* Get name of all config file sections. +* +* Fills in the array sections with the name of all the sections in the file +* (up to the number of max_sections sections). +* +* @param cfg +* Config file +* @param sections +* Array containing section names after successful invocation. Each elemen +* of this array should be preallocated by the user with at least +* CFG_NAME_LEN characters. +* @param max_sections +* Maximum number of section names to be stored in sections array +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_sections(struct rte_cfgfile *cfg, char *sections[], + int max_sections); + +/** +* Check if given section exists in config file +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @return +* TRUE (value different than 0) if section exists, FALSE (value 0) otherwise +*/ +int rte_cfgfile_has_section(struct rte_cfgfile *cfg, const char *sectionname); + +/** +* Get number of entries in given config file section +* +* If multiple sections have the given name this function operates on the +* first one. +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @return +* Number of entries in section +*/ +int rte_cfgfile_section_num_entries(struct rte_cfgfile *cfg, + const char *sectionname); + +/** Get section entries as key-value pairs +* +* If multiple sections have the given name this function operates on the +* first one. +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @param entries +* Pre-allocated array of at least max_entries entries where the section +* entries are stored as key-value pair after successful invocation +* @param max_entries +* Maximum number of section entries to be stored in entries array +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_section_entries(struct rte_cfgfile *cfg, + const char *sectionname, + struct rte_cfgfile_entry *entries, + int max_entries); + +/** Get section entries as key-value pairs +* +* The index of a section is the same as the index of its name in the +* result of rte_cfgfile_sections. This API can be used when there are +* multiple sections with the same name. +* +* @param cfg +* Config file +* @param index +* Section index +* @param sectionname +* Pre-allocated string of at least CFG_NAME_LEN characters where the +* section name is stored after successful invocation. +* @param entries +* Pre-allocated array of at least max_entries entries where the section +* entries are stored as key-value pair after successful invocation +* @param max_entries +* Maximum number of section entries to be stored in entries array +* @return +* Number of entries populated on success, negative error code otherwise +*/ +int rte_cfgfile_section_entries_by_index(struct rte_cfgfile *cfg, + int index, + char *sectionname, + struct rte_cfgfile_entry *entries, + int max_entries); + +/** Get value of the named entry in named config file section +* +* If multiple sections have the given name this function operates on the +* first one. +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @param entryname +* Entry name +* @return +* Entry value +*/ +const char *rte_cfgfile_get_entry(struct rte_cfgfile *cfg, + const char *sectionname, + const char *entryname); + +/** Check if given entry exists in named config file section +* +* If multiple sections have the given name this function operates on the +* first one. +* +* @param cfg +* Config file +* @param sectionname +* Section name +* @param entryname +* Entry name +* @return +* TRUE (value different than 0) if entry exists, FALSE (value 0) otherwise +*/ +int rte_cfgfile_has_entry(struct rte_cfgfile *cfg, const char *sectionname, + const char *entryname); + +/** Close config file +* +* @param cfg +* Config file +* @return +* 0 on success, error code otherwise +*/ +int rte_cfgfile_close(struct rte_cfgfile *cfg); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_cfgfile/rte_cfgfile_version.map b/lib/librte_cfgfile/rte_cfgfile_version.map new file mode 100644 index 00000000..3c2f0dbf --- /dev/null +++ b/lib/librte_cfgfile/rte_cfgfile_version.map @@ -0,0 +1,22 @@ +DPDK_2.0 { + global: + + rte_cfgfile_close; + rte_cfgfile_get_entry; + rte_cfgfile_has_entry; + rte_cfgfile_has_section; + rte_cfgfile_load; + rte_cfgfile_num_sections; + rte_cfgfile_section_entries; + rte_cfgfile_section_num_entries; + rte_cfgfile_sections; + + local: *; +}; + +DPDK_16.04 { + global: + + rte_cfgfile_section_entries_by_index; + +} DPDK_2.0; diff --git a/lib/librte_cmdline/Makefile b/lib/librte_cmdline/Makefile new file mode 100644 index 00000000..7d2d148c --- /dev/null +++ b/lib/librte_cmdline/Makefile @@ -0,0 +1,67 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_cmdline.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_cmdline_version.map + +LIBABIVER := 2 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) := cmdline.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_cirbuf.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_etheraddr.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_ipaddr.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_num.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_string.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_rdline.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_vt100.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_socket.c +SRCS-$(CONFIG_RTE_LIBRTE_CMDLINE) += cmdline_parse_portlist.c + +CFLAGS += -D_GNU_SOURCE + +# install includes +INCS := cmdline.h cmdline_parse.h cmdline_parse_num.h cmdline_parse_ipaddr.h +INCS += cmdline_parse_etheraddr.h cmdline_parse_string.h cmdline_rdline.h +INCS += cmdline_vt100.h cmdline_socket.h cmdline_cirbuf.h cmdline_parse_portlist.h +SYMLINK-$(CONFIG_RTE_LIBRTE_CMDLINE)-include := $(INCS) + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_CMDLINE) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_cmdline/cmdline.c b/lib/librte_cmdline/cmdline.c new file mode 100644 index 00000000..c405878e --- /dev/null +++ b/lib/librte_cmdline/cmdline.c @@ -0,0 +1,301 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cmdline_parse.h" +#include "cmdline_rdline.h" +#include "cmdline.h" + +static void +cmdline_valid_buffer(struct rdline *rdl, const char *buf, + __attribute__((unused)) unsigned int size) +{ + struct cmdline *cl = rdl->opaque; + int ret; + ret = cmdline_parse(cl, buf); + if (ret == CMDLINE_PARSE_AMBIGUOUS) + cmdline_printf(cl, "Ambiguous command\n"); + else if (ret == CMDLINE_PARSE_NOMATCH) + cmdline_printf(cl, "Command not found\n"); + else if (ret == CMDLINE_PARSE_BAD_ARGS) + cmdline_printf(cl, "Bad arguments\n"); +} + +static int +cmdline_complete_buffer(struct rdline *rdl, const char *buf, + char *dstbuf, unsigned int dstsize, + int *state) +{ + struct cmdline *cl = rdl->opaque; + return cmdline_complete(cl, buf, state, dstbuf, dstsize); +} + +int +cmdline_write_char(struct rdline *rdl, char c) +{ + int ret = -1; + struct cmdline *cl; + + if (!rdl) + return -1; + + cl = rdl->opaque; + + if (cl->s_out >= 0) + ret = write(cl->s_out, &c, 1); + + return ret; +} + + +void +cmdline_set_prompt(struct cmdline *cl, const char *prompt) +{ + if (!cl || !prompt) + return; + snprintf(cl->prompt, sizeof(cl->prompt), "%s", prompt); +} + +struct cmdline * +cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, int s_in, int s_out) +{ + struct cmdline *cl; + + if (!ctx || !prompt) + return NULL; + + cl = malloc(sizeof(struct cmdline)); + if (cl == NULL) + return NULL; + memset(cl, 0, sizeof(struct cmdline)); + cl->s_in = s_in; + cl->s_out = s_out; + cl->ctx = ctx; + + rdline_init(&cl->rdl, cmdline_write_char, + cmdline_valid_buffer, cmdline_complete_buffer); + cl->rdl.opaque = cl; + cmdline_set_prompt(cl, prompt); + rdline_newline(&cl->rdl, cl->prompt); + + return cl; +} + +void +cmdline_free(struct cmdline *cl) +{ + dprintf("called\n"); + + if (!cl) + return; + + if (cl->s_in > 2) + close(cl->s_in); + if (cl->s_out != cl->s_in && cl->s_out > 2) + close(cl->s_out); + free(cl); +} + +void +cmdline_printf(const struct cmdline *cl, const char *fmt, ...) +{ + va_list ap; + + if (!cl || !fmt) + return; + +#ifdef _GNU_SOURCE + if (cl->s_out < 0) + return; + va_start(ap, fmt); + vdprintf(cl->s_out, fmt, ap); + va_end(ap); +#else + int ret; + char *buf; + + if (cl->s_out < 0) + return; + + buf = malloc(BUFSIZ); + if (buf == NULL) + return; + va_start(ap, fmt); + ret = vsnprintf(buf, BUFSIZ, fmt, ap); + va_end(ap); + if (ret < 0) { + free(buf); + return; + } + if (ret >= BUFSIZ) + ret = BUFSIZ - 1; + write(cl->s_out, buf, ret); + free(buf); +#endif +} + +int +cmdline_in(struct cmdline *cl, const char *buf, int size) +{ + const char *history, *buffer; + size_t histlen, buflen; + int ret = 0; + int i, same; + + if (!cl || !buf) + return -1; + + for (i=0; irdl, buf[i]); + + if (ret == RDLINE_RES_VALIDATED) { + buffer = rdline_get_buffer(&cl->rdl); + history = rdline_get_history_item(&cl->rdl, 0); + if (history) { + histlen = strnlen(history, RDLINE_BUF_SIZE); + same = !memcmp(buffer, history, histlen) && + buffer[histlen] == '\n'; + } + else + same = 0; + buflen = strnlen(buffer, RDLINE_BUF_SIZE); + if (buflen > 1 && !same) + rdline_add_history(&cl->rdl, buffer); + rdline_newline(&cl->rdl, cl->prompt); + } + else if (ret == RDLINE_RES_EOF) + return -1; + else if (ret == RDLINE_RES_EXITED) + return -1; + } + return i; +} + +void +cmdline_quit(struct cmdline *cl) +{ + if (!cl) + return; + rdline_quit(&cl->rdl); +} + +int +cmdline_poll(struct cmdline *cl) +{ + struct pollfd pfd; + int status; + ssize_t read_status; + char c; + + if (!cl) + return -EINVAL; + else if (cl->rdl.status == RDLINE_EXITED) + return RDLINE_EXITED; + + pfd.fd = cl->s_in; + pfd.events = POLLIN; + pfd.revents = 0; + + status = poll(&pfd, 1, 0); + if (status < 0) + return status; + else if (status > 0) { + c = -1; + read_status = read(cl->s_in, &c, 1); + if (read_status < 0) + return read_status; + + status = cmdline_in(cl, &c, 1); + if (status < 0 && cl->rdl.status != RDLINE_EXITED) + return status; + } + + return cl->rdl.status; +} + +void +cmdline_interact(struct cmdline *cl) +{ + char c; + + if (!cl) + return; + + c = -1; + while (1) { + if (read(cl->s_in, &c, 1) <= 0) + break; + if (cmdline_in(cl, &c, 1) < 0) + break; + } +} diff --git a/lib/librte_cmdline/cmdline.h b/lib/librte_cmdline/cmdline.h new file mode 100644 index 00000000..2578ca81 --- /dev/null +++ b/lib/librte_cmdline/cmdline.h @@ -0,0 +1,115 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CMDLINE_H_ +#define _CMDLINE_H_ + +#include +#include + +/** + * @file + * + * Command line API + */ + +#ifdef __cplusplus +extern "C" { +#endif + +struct cmdline { + int s_in; + int s_out; + cmdline_parse_ctx_t *ctx; + struct rdline rdl; + char prompt[RDLINE_PROMPT_SIZE]; + struct termios oldterm; +}; + +struct cmdline *cmdline_new(cmdline_parse_ctx_t *ctx, const char *prompt, int s_in, int s_out); +void cmdline_set_prompt(struct cmdline *cl, const char *prompt); +void cmdline_free(struct cmdline *cl); +void cmdline_printf(const struct cmdline *cl, const char *fmt, ...) + __attribute__((format(printf,2,3))); +int cmdline_in(struct cmdline *cl, const char *buf, int size); +int cmdline_write_char(struct rdline *rdl, char c); + +/** + * This function is nonblocking equivalent of ``cmdline_interact()``. It polls + * *cl* for one character and interpret it. If return value is *RDLINE_EXITED* + * it mean that ``cmdline_quit()`` was invoked. + * + * @param cl + * The command line object. + * + * @return + * On success return object status - one of *enum rdline_status*. + * On error return negative value. + */ +int cmdline_poll(struct cmdline *cl); + +void cmdline_interact(struct cmdline *cl); +void cmdline_quit(struct cmdline *cl); + +#ifdef __cplusplus +} +#endif + +#endif /* _CMDLINE_SOCKET_H_ */ diff --git a/lib/librte_cmdline/cmdline_cirbuf.c b/lib/librte_cmdline/cmdline_cirbuf.c new file mode 100644 index 00000000..f506f880 --- /dev/null +++ b/lib/librte_cmdline/cmdline_cirbuf.c @@ -0,0 +1,466 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include "cmdline_cirbuf.h" + + +int +cirbuf_init(struct cirbuf *cbuf, char *buf, unsigned int start, unsigned int maxlen) +{ + if (!cbuf || !buf) + return -EINVAL; + cbuf->maxlen = maxlen; + cbuf->len = 0; + cbuf->start = start; + cbuf->end = start; + cbuf->buf = buf; + return 0; +} + +/* multiple add */ + +int +cirbuf_add_buf_head(struct cirbuf *cbuf, const char *c, unsigned int n) +{ + unsigned int e; + + if (!cbuf || !c || !n || n > CIRBUF_GET_FREELEN(cbuf)) + return -EINVAL; + + e = CIRBUF_IS_EMPTY(cbuf) ? 1 : 0; + + if (n < cbuf->start + e) { + dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->start - n + e, n); + memcpy(cbuf->buf + cbuf->start - n + e, c, n); + } + else { + dprintf("s[%d] -> d[%d] (%d)\n", + n - (cbuf->start + e), 0, + cbuf->start + e); + dprintf("s[%d] -> d[%d] (%d)\n", cbuf->maxlen - n + + (cbuf->start + e), 0, n - (cbuf->start + e)); + memcpy(cbuf->buf, c + n - (cbuf->start + e) , cbuf->start + e); + memcpy(cbuf->buf + cbuf->maxlen - n + (cbuf->start + e), c, + n - (cbuf->start + e)); + } + cbuf->len += n; + cbuf->start += (cbuf->maxlen - n + e); + cbuf->start %= cbuf->maxlen; + return n; +} + +/* multiple add */ + +int +cirbuf_add_buf_tail(struct cirbuf *cbuf, const char *c, unsigned int n) +{ + unsigned int e; + + if (!cbuf || !c || !n || n > CIRBUF_GET_FREELEN(cbuf)) + return -EINVAL; + + e = CIRBUF_IS_EMPTY(cbuf) ? 1 : 0; + + if (n < cbuf->maxlen - cbuf->end - 1 + e) { + dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->end + !e, n); + memcpy(cbuf->buf + cbuf->end + !e, c, n); + } + else { + dprintf("s[%d] -> d[%d] (%d)\n", cbuf->end + !e, 0, + cbuf->maxlen - cbuf->end - 1 + e); + dprintf("s[%d] -> d[%d] (%d)\n", cbuf->maxlen - cbuf->end - 1 + + e, 0, n - cbuf->maxlen + cbuf->end + 1 - e); + memcpy(cbuf->buf + cbuf->end + !e, c, cbuf->maxlen - + cbuf->end - 1 + e); + memcpy(cbuf->buf, c + cbuf->maxlen - cbuf->end - 1 + e, + n - cbuf->maxlen + cbuf->end + 1 - e); + } + cbuf->len += n; + cbuf->end += n - e; + cbuf->end %= cbuf->maxlen; + return n; +} + +/* add at head */ + +static inline void +__cirbuf_add_head(struct cirbuf * cbuf, char c) +{ + if (!CIRBUF_IS_EMPTY(cbuf)) { + cbuf->start += (cbuf->maxlen - 1); + cbuf->start %= cbuf->maxlen; + } + cbuf->buf[cbuf->start] = c; + cbuf->len ++; +} + +int +cirbuf_add_head_safe(struct cirbuf * cbuf, char c) +{ + if (cbuf && !CIRBUF_IS_FULL(cbuf)) { + __cirbuf_add_head(cbuf, c); + return 0; + } + return -EINVAL; +} + +void +cirbuf_add_head(struct cirbuf * cbuf, char c) +{ + __cirbuf_add_head(cbuf, c); +} + +/* add at tail */ + +static inline void +__cirbuf_add_tail(struct cirbuf * cbuf, char c) +{ + if (!CIRBUF_IS_EMPTY(cbuf)) { + cbuf->end ++; + cbuf->end %= cbuf->maxlen; + } + cbuf->buf[cbuf->end] = c; + cbuf->len ++; +} + +int +cirbuf_add_tail_safe(struct cirbuf * cbuf, char c) +{ + if (cbuf && !CIRBUF_IS_FULL(cbuf)) { + __cirbuf_add_tail(cbuf, c); + return 0; + } + return -EINVAL; +} + +void +cirbuf_add_tail(struct cirbuf * cbuf, char c) +{ + __cirbuf_add_tail(cbuf, c); +} + + +static inline void +__cirbuf_shift_left(struct cirbuf *cbuf) +{ + unsigned int i; + char tmp = cbuf->buf[cbuf->start]; + + for (i=0 ; ilen ; i++) { + cbuf->buf[(cbuf->start+i)%cbuf->maxlen] = + cbuf->buf[(cbuf->start+i+1)%cbuf->maxlen]; + } + cbuf->buf[(cbuf->start-1+cbuf->maxlen)%cbuf->maxlen] = tmp; + cbuf->start += (cbuf->maxlen - 1); + cbuf->start %= cbuf->maxlen; + cbuf->end += (cbuf->maxlen - 1); + cbuf->end %= cbuf->maxlen; +} + +static inline void +__cirbuf_shift_right(struct cirbuf *cbuf) +{ + unsigned int i; + char tmp = cbuf->buf[cbuf->end]; + + for (i=0 ; ilen ; i++) { + cbuf->buf[(cbuf->end+cbuf->maxlen-i)%cbuf->maxlen] = + cbuf->buf[(cbuf->end+cbuf->maxlen-i-1)%cbuf->maxlen]; + } + cbuf->buf[(cbuf->end+1)%cbuf->maxlen] = tmp; + cbuf->start += 1; + cbuf->start %= cbuf->maxlen; + cbuf->end += 1; + cbuf->end %= cbuf->maxlen; +} + +/* XXX we could do a better algorithm here... */ +int +cirbuf_align_left(struct cirbuf * cbuf) +{ + if (!cbuf) + return -EINVAL; + + if (cbuf->start < cbuf->maxlen/2) { + while (cbuf->start != 0) { + __cirbuf_shift_left(cbuf); + } + } + else { + while (cbuf->start != 0) { + __cirbuf_shift_right(cbuf); + } + } + + return 0; +} + +/* XXX we could do a better algorithm here... */ +int +cirbuf_align_right(struct cirbuf * cbuf) +{ + if (!cbuf) + return -EINVAL; + + if (cbuf->start >= cbuf->maxlen/2) { + while (cbuf->end != cbuf->maxlen-1) { + __cirbuf_shift_left(cbuf); + } + } + else { + while (cbuf->start != cbuf->maxlen-1) { + __cirbuf_shift_right(cbuf); + } + } + + return 0; +} + +/* buffer del */ + +int +cirbuf_del_buf_head(struct cirbuf *cbuf, unsigned int size) +{ + if (!cbuf || !size || size > CIRBUF_GET_LEN(cbuf)) + return -EINVAL; + + cbuf->len -= size; + if (CIRBUF_IS_EMPTY(cbuf)) { + cbuf->start += size - 1; + cbuf->start %= cbuf->maxlen; + } + else { + cbuf->start += size; + cbuf->start %= cbuf->maxlen; + } + return 0; +} + +/* buffer del */ + +int +cirbuf_del_buf_tail(struct cirbuf *cbuf, unsigned int size) +{ + if (!cbuf || !size || size > CIRBUF_GET_LEN(cbuf)) + return -EINVAL; + + cbuf->len -= size; + if (CIRBUF_IS_EMPTY(cbuf)) { + cbuf->end += (cbuf->maxlen - size + 1); + cbuf->end %= cbuf->maxlen; + } + else { + cbuf->end += (cbuf->maxlen - size); + cbuf->end %= cbuf->maxlen; + } + return 0; +} + +/* del at head */ + +static inline void +__cirbuf_del_head(struct cirbuf * cbuf) +{ + cbuf->len --; + if (!CIRBUF_IS_EMPTY(cbuf)) { + cbuf->start ++; + cbuf->start %= cbuf->maxlen; + } +} + +int +cirbuf_del_head_safe(struct cirbuf * cbuf) +{ + if (cbuf && !CIRBUF_IS_EMPTY(cbuf)) { + __cirbuf_del_head(cbuf); + return 0; + } + return -EINVAL; +} + +void +cirbuf_del_head(struct cirbuf * cbuf) +{ + __cirbuf_del_head(cbuf); +} + +/* del at tail */ + +static inline void +__cirbuf_del_tail(struct cirbuf * cbuf) +{ + cbuf->len --; + if (!CIRBUF_IS_EMPTY(cbuf)) { + cbuf->end += (cbuf->maxlen - 1); + cbuf->end %= cbuf->maxlen; + } +} + +int +cirbuf_del_tail_safe(struct cirbuf * cbuf) +{ + if (cbuf && !CIRBUF_IS_EMPTY(cbuf)) { + __cirbuf_del_tail(cbuf); + return 0; + } + return -EINVAL; +} + +void +cirbuf_del_tail(struct cirbuf * cbuf) +{ + __cirbuf_del_tail(cbuf); +} + +/* convert to buffer */ + +int +cirbuf_get_buf_head(struct cirbuf *cbuf, char *c, unsigned int size) +{ + unsigned int n; + + if (!cbuf || !c) + return -EINVAL; + + n = (size < CIRBUF_GET_LEN(cbuf)) ? size : CIRBUF_GET_LEN(cbuf); + + if (!n) + return 0; + + if (cbuf->start <= cbuf->end) { + dprintf("s[%d] -> d[%d] (%d)\n", cbuf->start, 0, n); + memcpy(c, cbuf->buf + cbuf->start , n); + } + else { + /* check if we need to go from end to the beginning */ + if (n <= cbuf->maxlen - cbuf->start) { + dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->start, n); + memcpy(c, cbuf->buf + cbuf->start , n); + } + else { + dprintf("s[%d] -> d[%d] (%d)\n", cbuf->start, 0, + cbuf->maxlen - cbuf->start); + dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->maxlen - cbuf->start, + n - cbuf->maxlen + cbuf->start); + memcpy(c, cbuf->buf + cbuf->start , cbuf->maxlen - cbuf->start); + memcpy(c + cbuf->maxlen - cbuf->start, cbuf->buf, + n - cbuf->maxlen + cbuf->start); + } + } + return n; +} + +/* convert to buffer */ + +int +cirbuf_get_buf_tail(struct cirbuf *cbuf, char *c, unsigned int size) +{ + unsigned int n; + + if (!cbuf || !c) + return -EINVAL; + + n = (size < CIRBUF_GET_LEN(cbuf)) ? size : CIRBUF_GET_LEN(cbuf); + + if (!n) + return 0; + + if (cbuf->start <= cbuf->end) { + dprintf("s[%d] -> d[%d] (%d)\n", cbuf->end - n + 1, 0, n); + memcpy(c, cbuf->buf + cbuf->end - n + 1, n); + } + else { + /* check if we need to go from end to the beginning */ + if (n <= cbuf->end + 1) { + dprintf("s[%d] -> d[%d] (%d)\n", 0, cbuf->end - n + 1, n); + memcpy(c, cbuf->buf + cbuf->end - n + 1, n); + } + else { + dprintf("s[%d] -> d[%d] (%d)\n", 0, + cbuf->maxlen - cbuf->start, cbuf->end + 1); + dprintf("s[%d] -> d[%d] (%d)\n", + cbuf->maxlen - n + cbuf->end + 1, 0, n - cbuf->end - 1); + memcpy(c + cbuf->maxlen - cbuf->start, + cbuf->buf, cbuf->end + 1); + memcpy(c, cbuf->buf + cbuf->maxlen - n + cbuf->end +1, + n - cbuf->end - 1); + } + } + return n; +} + +/* get head or get tail */ + +char +cirbuf_get_head(struct cirbuf * cbuf) +{ + return cbuf->buf[cbuf->start]; +} + +/* get head or get tail */ + +char +cirbuf_get_tail(struct cirbuf * cbuf) +{ + return cbuf->buf[cbuf->end]; +} diff --git a/lib/librte_cmdline/cmdline_cirbuf.h b/lib/librte_cmdline/cmdline_cirbuf.h new file mode 100644 index 00000000..6321dec5 --- /dev/null +++ b/lib/librte_cmdline/cmdline_cirbuf.h @@ -0,0 +1,245 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CIRBUF_H_ +#define _CIRBUF_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * This structure is the header of a cirbuf type. + */ +struct cirbuf { + unsigned int maxlen; /**< total len of the fifo (number of elements) */ + unsigned int start; /**< indice of the first elt */ + unsigned int end; /**< indice of the last elt */ + unsigned int len; /**< current len of fifo */ + char *buf; +}; + +#ifdef RTE_LIBRTE_CMDLINE_DEBUG +#define dprintf_(fmt, ...) printf("line %3.3d - " fmt "%.0s", __LINE__, __VA_ARGS__) +#define dprintf(...) dprintf_(__VA_ARGS__, "dummy") +#else +#define dprintf(...) (void)0 +#endif + + +/** + * Init the circular buffer + */ +int cirbuf_init(struct cirbuf *cbuf, char *buf, unsigned int start, unsigned int maxlen); + + +/** + * Return 1 if the circular buffer is full + */ +#define CIRBUF_IS_FULL(cirbuf) ((cirbuf)->maxlen == (cirbuf)->len) + +/** + * Return 1 if the circular buffer is empty + */ +#define CIRBUF_IS_EMPTY(cirbuf) ((cirbuf)->len == 0) + +/** + * return current size of the circular buffer (number of used elements) + */ +#define CIRBUF_GET_LEN(cirbuf) ((cirbuf)->len) + +/** + * return size of the circular buffer (used + free elements) + */ +#define CIRBUF_GET_MAXLEN(cirbuf) ((cirbuf)->maxlen) + +/** + * return the number of free elts + */ +#define CIRBUF_GET_FREELEN(cirbuf) ((cirbuf)->maxlen - (cirbuf)->len) + +/** + * Iterator for a circular buffer + * c: struct cirbuf pointer + * i: an integer type internally used in the macro + * e: char that takes the value for each iteration + */ +#define CIRBUF_FOREACH(c, i, e) \ + for ( i=0, e=(c)->buf[(c)->start] ; \ + i<((c)->len) ; \ + i ++, e=(c)->buf[((c)->start+i)%((c)->maxlen)]) + + +/** + * Add a character at head of the circular buffer. Return 0 on success, or + * a negative value on error. + */ +int cirbuf_add_head_safe(struct cirbuf *cbuf, char c); + +/** + * Add a character at head of the circular buffer. You _must_ check that you + * have enough free space in the buffer before calling this func. + */ +void cirbuf_add_head(struct cirbuf *cbuf, char c); + +/** + * Add a character at tail of the circular buffer. Return 0 on success, or + * a negative value on error. + */ +int cirbuf_add_tail_safe(struct cirbuf *cbuf, char c); + +/** + * Add a character at tail of the circular buffer. You _must_ check that you + * have enough free space in the buffer before calling this func. + */ +void cirbuf_add_tail(struct cirbuf *cbuf, char c); + +/** + * Remove a char at the head of the circular buffer. Return 0 on + * success, or a negative value on error. + */ +int cirbuf_del_head_safe(struct cirbuf *cbuf); + +/** + * Remove a char at the head of the circular buffer. You _must_ check + * that buffer is not empty before calling the function. + */ +void cirbuf_del_head(struct cirbuf *cbuf); + +/** + * Remove a char at the tail of the circular buffer. Return 0 on + * success, or a negative value on error. + */ +int cirbuf_del_tail_safe(struct cirbuf *cbuf); + +/** + * Remove a char at the tail of the circular buffer. You _must_ check + * that buffer is not empty before calling the function. + */ +void cirbuf_del_tail(struct cirbuf *cbuf); + +/** + * Return the head of the circular buffer. You _must_ check that + * buffer is not empty before calling the function. + */ +char cirbuf_get_head(struct cirbuf *cbuf); + +/** + * Return the tail of the circular buffer. You _must_ check that + * buffer is not empty before calling the function. + */ +char cirbuf_get_tail(struct cirbuf *cbuf); + +/** + * Add a buffer at head of the circular buffer. 'c' is a pointer to a + * buffer, and n is the number of char to add. Return the number of + * copied bytes on success, or a negative value on error. + */ +int cirbuf_add_buf_head(struct cirbuf *cbuf, const char *c, unsigned int n); + +/** + * Add a buffer at tail of the circular buffer. 'c' is a pointer to a + * buffer, and n is the number of char to add. Return the number of + * copied bytes on success, or a negative value on error. + */ +int cirbuf_add_buf_tail(struct cirbuf *cbuf, const char *c, unsigned int n); + +/** + * Remove chars at the head of the circular buffer. Return 0 on + * success, or a negative value on error. + */ +int cirbuf_del_buf_head(struct cirbuf *cbuf, unsigned int size); + +/** + * Remove chars at the tail of the circular buffer. Return 0 on + * success, or a negative value on error. + */ +int cirbuf_del_buf_tail(struct cirbuf *cbuf, unsigned int size); + +/** + * Copy a maximum of 'size' characters from the head of the circular + * buffer to a flat one pointed by 'c'. Return the number of copied + * chars. + */ +int cirbuf_get_buf_head(struct cirbuf *cbuf, char *c, unsigned int size); + +/** + * Copy a maximum of 'size' characters from the tail of the circular + * buffer to a flat one pointed by 'c'. Return the number of copied + * chars. + */ +int cirbuf_get_buf_tail(struct cirbuf *cbuf, char *c, unsigned int size); + + +/** + * Set the start of the data to the index 0 of the internal buffer. + */ +int cirbuf_align_left(struct cirbuf *cbuf); + +/** + * Set the end of the data to the last index of the internal buffer. + */ +int cirbuf_align_right(struct cirbuf *cbuf); + +#ifdef __cplusplus +} +#endif + +#endif /* _CIRBUF_H_ */ diff --git a/lib/librte_cmdline/cmdline_parse.c b/lib/librte_cmdline/cmdline_parse.c new file mode 100644 index 00000000..24a6ed67 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse.c @@ -0,0 +1,563 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include "cmdline_rdline.h" +#include "cmdline_parse.h" +#include "cmdline.h" + +#ifdef RTE_LIBRTE_CMDLINE_DEBUG +#define debug_printf printf +#else +#define debug_printf(args...) do {} while(0) +#endif + +#define CMDLINE_BUFFER_SIZE 64 + +/* isblank() needs _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE, so use our + * own. */ +static int +isblank2(char c) +{ + if (c == ' ' || + c == '\t' ) + return 1; + return 0; +} + +static int +isendofline(char c) +{ + if (c == '\n' || + c == '\r' ) + return 1; + return 0; +} + +static int +iscomment(char c) +{ + if (c == '#') + return 1; + return 0; +} + +int +cmdline_isendoftoken(char c) +{ + if (!c || iscomment(c) || isblank2(c) || isendofline(c)) + return 1; + return 0; +} + +static unsigned int +nb_common_chars(const char * s1, const char * s2) +{ + unsigned int i=0; + + while (*s1==*s2 && *s1) { + s1++; + s2++; + i++; + } + return i; +} + +/** + * try to match the buffer with an instruction (only the first + * nb_match_token tokens if != 0). Return 0 if we match all the + * tokens, else the number of matched tokens, else -1. + */ +static int +match_inst(cmdline_parse_inst_t *inst, const char *buf, + unsigned int nb_match_token, void *resbuf, unsigned resbuf_size) +{ + unsigned int token_num=0; + cmdline_parse_token_hdr_t * token_p; + unsigned int i=0; + int n = 0; + struct cmdline_token_hdr token_hdr; + + token_p = inst->tokens[token_num]; + if (token_p) + memcpy(&token_hdr, token_p, sizeof(token_hdr)); + + /* check if we match all tokens of inst */ + while (token_p && (!nb_match_token || iparse(token_p, buf, NULL, 0); + } else { + unsigned rb_sz; + + if (token_hdr.offset > resbuf_size) { + printf("Parse error(%s:%d): Token offset(%u) " + "exceeds maximum size(%u)\n", + __FILE__, __LINE__, + token_hdr.offset, resbuf_size); + return -ENOBUFS; + } + rb_sz = resbuf_size - token_hdr.offset; + + n = token_hdr.ops->parse(token_p, buf, (char *)resbuf + + token_hdr.offset, rb_sz); + } + + if (n < 0) + break; + + debug_printf("TK parsed (len=%d)\n", n); + i++; + buf += n; + + token_num ++; + token_p = inst->tokens[token_num]; + if (token_p) + memcpy(&token_hdr, token_p, sizeof(token_hdr)); + } + + /* does not match */ + if (i==0) + return -1; + + /* in case we want to match a specific num of token */ + if (nb_match_token) { + if (i == nb_match_token) { + return 0; + } + return i; + } + + /* we don't match all the tokens */ + if (token_p) { + return i; + } + + /* are there are some tokens more */ + while (isblank2(*buf)) { + buf++; + } + + /* end of buf */ + if ( isendofline(*buf) || iscomment(*buf) ) + return 0; + + /* garbage after inst */ + return i; +} + + +int +cmdline_parse(struct cmdline *cl, const char * buf) +{ + unsigned int inst_num=0; + cmdline_parse_inst_t *inst; + const char *curbuf; + char result_buf[CMDLINE_PARSE_RESULT_BUFSIZE]; + void (*f)(void *, struct cmdline *, void *) = NULL; + void *data = NULL; + int comment = 0; + int linelen = 0; + int parse_it = 0; + int err = CMDLINE_PARSE_NOMATCH; + int tok; + cmdline_parse_ctx_t *ctx; +#ifdef RTE_LIBRTE_CMDLINE_DEBUG + char debug_buf[BUFSIZ]; +#endif + + if (!cl || !buf) + return CMDLINE_PARSE_BAD_ARGS; + + ctx = cl->ctx; + + /* + * - look if the buffer contains at least one line + * - look if line contains only spaces or comments + * - count line length + */ + curbuf = buf; + while (! isendofline(*curbuf)) { + if ( *curbuf == '\0' ) { + debug_printf("Incomplete buf (len=%d)\n", linelen); + return 0; + } + if ( iscomment(*curbuf) ) { + comment = 1; + } + if ( ! isblank2(*curbuf) && ! comment) { + parse_it = 1; + } + curbuf++; + linelen++; + } + + /* skip all endofline chars */ + while (isendofline(buf[linelen])) { + linelen++; + } + + /* empty line */ + if ( parse_it == 0 ) { + debug_printf("Empty line (len=%d)\n", linelen); + return linelen; + } + +#ifdef RTE_LIBRTE_CMDLINE_DEBUG + snprintf(debug_buf, (linelen>64 ? 64 : linelen), "%s", buf); + debug_printf("Parse line : len=%d, <%s>\n", linelen, debug_buf); +#endif + + /* parse it !! */ + inst = ctx[inst_num]; + while (inst) { + debug_printf("INST %d\n", inst_num); + + /* fully parsed */ + tok = match_inst(inst, buf, 0, result_buf, sizeof(result_buf)); + + if (tok > 0) /* we matched at least one token */ + err = CMDLINE_PARSE_BAD_ARGS; + + else if (!tok) { + debug_printf("INST fully parsed\n"); + /* skip spaces */ + while (isblank2(*curbuf)) { + curbuf++; + } + + /* if end of buf -> there is no garbage after inst */ + if (isendofline(*curbuf) || iscomment(*curbuf)) { + if (!f) { + memcpy(&f, &inst->f, sizeof(f)); + memcpy(&data, &inst->data, sizeof(data)); + } + else { + /* more than 1 inst matches */ + err = CMDLINE_PARSE_AMBIGUOUS; + f=NULL; + debug_printf("Ambiguous cmd\n"); + break; + } + } + } + + inst_num ++; + inst = ctx[inst_num]; + } + + /* call func */ + if (f) { + f(result_buf, cl, data); + } + + /* no match */ + else { + debug_printf("No match err=%d\n", err); + return err; + } + + return linelen; +} + +int +cmdline_complete(struct cmdline *cl, const char *buf, int *state, + char *dst, unsigned int size) +{ + const char *partial_tok = buf; + unsigned int inst_num = 0; + cmdline_parse_inst_t *inst; + cmdline_parse_token_hdr_t *token_p; + struct cmdline_token_hdr token_hdr; + char tmpbuf[CMDLINE_BUFFER_SIZE], comp_buf[CMDLINE_BUFFER_SIZE]; + unsigned int partial_tok_len; + int comp_len = -1; + int tmp_len = -1; + int nb_token = 0; + unsigned int i, n; + int l; + unsigned int nb_completable; + unsigned int nb_non_completable; + int local_state = 0; + const char *help_str; + cmdline_parse_ctx_t *ctx; + + if (!cl || !buf || !state || !dst) + return -1; + + ctx = cl->ctx; + + debug_printf("%s called\n", __func__); + memset(&token_hdr, 0, sizeof(token_hdr)); + + /* count the number of complete token to parse */ + for (i=0 ; buf[i] ; i++) { + if (!isblank2(buf[i]) && isblank2(buf[i+1])) + nb_token++; + if (isblank2(buf[i]) && !isblank2(buf[i+1])) + partial_tok = buf+i+1; + } + partial_tok_len = strnlen(partial_tok, RDLINE_BUF_SIZE); + + /* first call -> do a first pass */ + if (*state <= 0) { + debug_printf("try complete <%s>\n", buf); + debug_printf("there is %d complete tokens, <%s> is incomplete\n", + nb_token, partial_tok); + + nb_completable = 0; + nb_non_completable = 0; + + inst = ctx[inst_num]; + while (inst) { + /* parse the first tokens of the inst */ + if (nb_token && match_inst(inst, buf, nb_token, NULL, 0)) + goto next; + + debug_printf("instruction match\n"); + token_p = inst->tokens[nb_token]; + if (token_p) + memcpy(&token_hdr, token_p, sizeof(token_hdr)); + + /* non completable */ + if (!token_p || + !token_hdr.ops->complete_get_nb || + !token_hdr.ops->complete_get_elt || + (n = token_hdr.ops->complete_get_nb(token_p)) == 0) { + nb_non_completable++; + goto next; + } + + debug_printf("%d choices for this token\n", n); + for (i=0 ; icomplete_get_elt(token_p, i, + tmpbuf, + sizeof(tmpbuf)) < 0) + continue; + + /* we have at least room for one char */ + tmp_len = strnlen(tmpbuf, sizeof(tmpbuf)); + if (tmp_len < CMDLINE_BUFFER_SIZE - 1) { + tmpbuf[tmp_len] = ' '; + tmpbuf[tmp_len+1] = 0; + } + + debug_printf(" choice <%s>\n", tmpbuf); + + /* does the completion match the + * beginning of the word ? */ + if (!strncmp(partial_tok, tmpbuf, + partial_tok_len)) { + if (comp_len == -1) { + snprintf(comp_buf, sizeof(comp_buf), + "%s", tmpbuf + partial_tok_len); + comp_len = + strnlen(tmpbuf + partial_tok_len, + sizeof(tmpbuf) - partial_tok_len); + + } + else { + comp_len = + nb_common_chars(comp_buf, + tmpbuf+partial_tok_len); + comp_buf[comp_len] = 0; + } + nb_completable++; + } + } + next: + debug_printf("next\n"); + inst_num ++; + inst = ctx[inst_num]; + } + + debug_printf("total choices %d for this completion\n", + nb_completable); + + /* no possible completion */ + if (nb_completable == 0 && nb_non_completable == 0) + return 0; + + /* if multichoice is not required */ + if (*state == 0 && partial_tok_len > 0) { + /* one or several choices starting with the + same chars */ + if (comp_len > 0) { + if ((unsigned)(comp_len + 1) > size) + return 0; + + snprintf(dst, size, "%s", comp_buf); + dst[comp_len] = 0; + return 2; + } + } + } + + /* init state correctly */ + if (*state == -1) + *state = 0; + + debug_printf("Multiple choice STATE=%d\n", *state); + + inst_num = 0; + inst = ctx[inst_num]; + while (inst) { + /* we need to redo it */ + inst = ctx[inst_num]; + + if (nb_token && match_inst(inst, buf, nb_token, NULL, 0)) + goto next2; + + token_p = inst->tokens[nb_token]; + if (token_p) + memcpy(&token_hdr, token_p, sizeof(token_hdr)); + + /* one choice for this token */ + if (!token_p || + !token_hdr.ops->complete_get_nb || + !token_hdr.ops->complete_get_elt || + (n = token_hdr.ops->complete_get_nb(token_p)) == 0) { + if (local_state < *state) { + local_state++; + goto next2; + } + (*state)++; + if (token_p && token_hdr.ops->get_help) { + token_hdr.ops->get_help(token_p, tmpbuf, + sizeof(tmpbuf)); + help_str = inst->help_str; + if (help_str) + snprintf(dst, size, "[%s]: %s", tmpbuf, + help_str); + else + snprintf(dst, size, "[%s]: No help", + tmpbuf); + } + else { + snprintf(dst, size, "[RETURN]"); + } + return 1; + } + + /* several choices */ + for (i=0 ; icomplete_get_elt(token_p, i, tmpbuf, + sizeof(tmpbuf)) < 0) + continue; + /* we have at least room for one char */ + tmp_len = strnlen(tmpbuf, sizeof(tmpbuf)); + if (tmp_len < CMDLINE_BUFFER_SIZE - 1) { + tmpbuf[tmp_len] = ' '; + tmpbuf[tmp_len + 1] = 0; + } + + debug_printf(" choice <%s>\n", tmpbuf); + + /* does the completion match the beginning of + * the word ? */ + if (!strncmp(partial_tok, tmpbuf, + partial_tok_len)) { + if (local_state < *state) { + local_state++; + continue; + } + (*state)++; + l=snprintf(dst, size, "%s", tmpbuf); + if (l>=0 && token_hdr.ops->get_help) { + token_hdr.ops->get_help(token_p, tmpbuf, + sizeof(tmpbuf)); + help_str = inst->help_str; + if (help_str) + snprintf(dst+l, size-l, "[%s]: %s", + tmpbuf, help_str); + else + snprintf(dst+l, size-l, + "[%s]: No help", tmpbuf); + } + + return 1; + } + } + next2: + inst_num ++; + inst = ctx[inst_num]; + } + return 0; +} diff --git a/lib/librte_cmdline/cmdline_parse.h b/lib/librte_cmdline/cmdline_parse.h new file mode 100644 index 00000000..4b25c456 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse.h @@ -0,0 +1,191 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CMDLINE_PARSE_H_ +#define _CMDLINE_PARSE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#ifndef offsetof +#define offsetof(type, field) ((size_t) &( ((type *)0)->field) ) +#endif + +/* return status for parsing */ +#define CMDLINE_PARSE_SUCCESS 0 +#define CMDLINE_PARSE_AMBIGUOUS -1 +#define CMDLINE_PARSE_NOMATCH -2 +#define CMDLINE_PARSE_BAD_ARGS -3 + +/* return status for completion */ +#define CMDLINE_PARSE_COMPLETE_FINISHED 0 +#define CMDLINE_PARSE_COMPLETE_AGAIN 1 +#define CMDLINE_PARSE_COMPLETED_BUFFER 2 + +/* maximum buffer size for parsed result */ +#define CMDLINE_PARSE_RESULT_BUFSIZE 8192 + +/** + * Stores a pointer to the ops struct, and the offset: the place to + * write the parsed result in the destination structure. + */ +struct cmdline_token_hdr { + struct cmdline_token_ops *ops; + unsigned int offset; +}; +typedef struct cmdline_token_hdr cmdline_parse_token_hdr_t; + +/** + * A token is defined by this structure. + * + * parse() takes the token as first argument, then the source buffer + * starting at the token we want to parse. The 3rd arg is a pointer + * where we store the parsed data (as binary). It returns the number of + * parsed chars on success and a negative value on error. + * + * complete_get_nb() returns the number of possible values for this + * token if completion is possible. If it is NULL or if it returns 0, + * no completion is possible. + * + * complete_get_elt() copy in dstbuf (the size is specified in the + * parameter) the i-th possible completion for this token. returns 0 + * on success or and a negative value on error. + * + * get_help() fills the dstbuf with the help for the token. It returns + * -1 on error and 0 on success. + */ +struct cmdline_token_ops { + /** parse(token ptr, buf, res pts, buf len) */ + int (*parse)(cmdline_parse_token_hdr_t *, const char *, void *, + unsigned int); + /** return the num of possible choices for this token */ + int (*complete_get_nb)(cmdline_parse_token_hdr_t *); + /** return the elt x for this token (token, idx, dstbuf, size) */ + int (*complete_get_elt)(cmdline_parse_token_hdr_t *, int, char *, + unsigned int); + /** get help for this token (token, dstbuf, size) */ + int (*get_help)(cmdline_parse_token_hdr_t *, char *, unsigned int); +}; + +struct cmdline; +/** + * Store a instruction, which is a pointer to a callback function and + * its parameter that is called when the instruction is parsed, a help + * string, and a list of token composing this instruction. + */ +struct cmdline_inst { + /* f(parsed_struct, data) */ + void (*f)(void *, struct cmdline *, void *); + void *data; + const char *help_str; + cmdline_parse_token_hdr_t *tokens[]; +}; +typedef struct cmdline_inst cmdline_parse_inst_t; + +/** + * A context is identified by its name, and contains a list of + * instruction + * + */ +typedef cmdline_parse_inst_t *cmdline_parse_ctx_t; + +/** + * Try to parse a buffer according to the specified context. The + * argument buf must ends with "\n\0". The function returns + * CMDLINE_PARSE_AMBIGUOUS, CMDLINE_PARSE_NOMATCH or + * CMDLINE_PARSE_BAD_ARGS on error. Else it calls the associated + * function (defined in the context) and returns 0 + * (CMDLINE_PARSE_SUCCESS). + */ +int cmdline_parse(struct cmdline *cl, const char *buf); + +/** + * complete() must be called with *state==0 (try to complete) or + * with *state==-1 (just display choices), then called without + * modifying *state until it returns CMDLINE_PARSE_COMPLETED_BUFFER or + * CMDLINE_PARSE_COMPLETED_BUFFER. + * + * It returns < 0 on error. + * + * Else it returns: + * - CMDLINE_PARSE_COMPLETED_BUFFER on completion (one possible + * choice). In this case, the chars are appended in dst buffer. + * - CMDLINE_PARSE_COMPLETE_AGAIN if there is several possible + * choices. In this case, you must call the function again, + * keeping the value of state intact. + * - CMDLINE_PARSE_COMPLETED_BUFFER when the iteration is + * finished. The dst is not valid for this last call. + * + * The returned dst buf ends with \0. + */ +int cmdline_complete(struct cmdline *cl, const char *buf, int *state, + char *dst, unsigned int size); + + +/* return true if(!c || iscomment(c) || isblank(c) || + * isendofline(c)) */ +int cmdline_isendoftoken(char c); + +#ifdef __cplusplus +} +#endif + +#endif /* _CMDLINE_PARSE_H_ */ diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.c b/lib/librte_cmdline/cmdline_parse_etheraddr.c new file mode 100644 index 00000000..dbfe4a61 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_etheraddr.c @@ -0,0 +1,180 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "cmdline_parse.h" +#include "cmdline_parse_etheraddr.h" + +struct cmdline_token_ops cmdline_token_etheraddr_ops = { + .parse = cmdline_parse_etheraddr, + .complete_get_nb = NULL, + .complete_get_elt = NULL, + .get_help = cmdline_get_help_etheraddr, +}; + +/* the format can be either XX:XX:XX:XX:XX:XX or XXXX:XXXX:XXXX */ +#define ETHER_ADDRSTRLENLONG 18 +#define ETHER_ADDRSTRLENSHORT 15 + +#ifdef __linux__ +#define ea_oct ether_addr_octet +#else +#define ea_oct octet +#endif + + +static struct ether_addr * +my_ether_aton(const char *a) +{ + int i; + char *end; + unsigned long o[ETHER_ADDR_LEN]; + static struct ether_addr ether_addr; + + i = 0; + do { + errno = 0; + o[i] = strtoul(a, &end, 16); + if (errno != 0 || end == a || (end[0] != ':' && end[0] != 0)) + return NULL; + a = end + 1; + } while (++i != sizeof (o) / sizeof (o[0]) && end[0] != 0); + + /* Junk at the end of line */ + if (end[0] != 0) + return NULL; + + /* Support the format XX:XX:XX:XX:XX:XX */ + if (i == ETHER_ADDR_LEN) { + while (i-- != 0) { + if (o[i] > UINT8_MAX) + return NULL; + ether_addr.ea_oct[i] = (uint8_t)o[i]; + } + /* Support the format XXXX:XXXX:XXXX */ + } else if (i == ETHER_ADDR_LEN / 2) { + while (i-- != 0) { + if (o[i] > UINT16_MAX) + return NULL; + ether_addr.ea_oct[i * 2] = (uint8_t)(o[i] >> 8); + ether_addr.ea_oct[i * 2 + 1] = (uint8_t)(o[i] & 0xff); + } + /* unknown format */ + } else + return NULL; + + return (struct ether_addr *)ðer_addr; +} + +int +cmdline_parse_etheraddr(__attribute__((unused)) cmdline_parse_token_hdr_t *tk, + const char *buf, void *res, unsigned ressize) +{ + unsigned int token_len = 0; + char ether_str[ETHER_ADDRSTRLENLONG+1]; + struct ether_addr *tmp; + + if (res && ressize < sizeof(struct ether_addr)) + return -1; + + if (!buf || ! *buf) + return -1; + + while (!cmdline_isendoftoken(buf[token_len])) + token_len++; + + /* if token doesn't match possible string lengths... */ + if ((token_len != ETHER_ADDRSTRLENLONG - 1) && + (token_len != ETHER_ADDRSTRLENSHORT - 1)) + return -1; + + snprintf(ether_str, token_len+1, "%s", buf); + + tmp = my_ether_aton(ether_str); + if (tmp == NULL) + return -1; + if (res) + memcpy(res, tmp, sizeof(struct ether_addr)); + return token_len; +} + +int +cmdline_get_help_etheraddr(__attribute__((unused)) cmdline_parse_token_hdr_t *tk, + char *dstbuf, unsigned int size) +{ + int ret; + + ret = snprintf(dstbuf, size, "Ethernet address"); + if (ret < 0) + return -1; + return 0; +} diff --git a/lib/librte_cmdline/cmdline_parse_etheraddr.h b/lib/librte_cmdline/cmdline_parse_etheraddr.h new file mode 100644 index 00000000..e539fb66 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_etheraddr.h @@ -0,0 +1,96 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PARSE_ETHERADDR_H_ +#define _PARSE_ETHERADDR_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct cmdline_token_etheraddr { + struct cmdline_token_hdr hdr; +}; +typedef struct cmdline_token_etheraddr cmdline_parse_token_etheraddr_t; + +extern struct cmdline_token_ops cmdline_token_etheraddr_ops; + +int cmdline_parse_etheraddr(cmdline_parse_token_hdr_t *tk, const char *srcbuf, + void *res, unsigned ressize); +int cmdline_get_help_etheraddr(cmdline_parse_token_hdr_t *tk, char *dstbuf, + unsigned int size); + +#define TOKEN_ETHERADDR_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_etheraddr_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ +} + +#ifdef __cplusplus +} +#endif + + +#endif /* _PARSE_ETHERADDR_H_ */ diff --git a/lib/librte_cmdline/cmdline_parse_ipaddr.c b/lib/librte_cmdline/cmdline_parse_ipaddr.c new file mode 100644 index 00000000..d3d3e044 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_ipaddr.c @@ -0,0 +1,408 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * For inet_ntop() functions: + * + * Copyright (c) 1996 by Internet Software Consortium. + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND INTERNET SOFTWARE CONSORTIUM DISCLAIMS + * ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL INTERNET SOFTWARE + * CONSORTIUM BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL + * DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR + * PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS + * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS + * SOFTWARE. + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef __linux__ +#ifndef __FreeBSD__ +#include +#else +#include +#endif +#endif + +#include + +#include "cmdline_parse.h" +#include "cmdline_parse_ipaddr.h" + +struct cmdline_token_ops cmdline_token_ipaddr_ops = { + .parse = cmdline_parse_ipaddr, + .complete_get_nb = NULL, + .complete_get_elt = NULL, + .get_help = cmdline_get_help_ipaddr, +}; + +#define INADDRSZ 4 +#define IN6ADDRSZ 16 +#define PREFIXMAX 128 +#define V4PREFIXMAX 32 + +/* + * WARNING: Don't even consider trying to compile this on a system where + * sizeof(int) < 4. sizeof(int) > 4 is fine; all the world's not a VAX. + */ + +static int inet_pton4(const char *src, unsigned char *dst); +static int inet_pton6(const char *src, unsigned char *dst); + +/* int + * inet_pton(af, src, dst) + * convert from presentation format (which usually means ASCII printable) + * to network format (which is usually some kind of binary format). + * return: + * 1 if the address was valid for the specified address family + * 0 if the address wasn't valid (`dst' is untouched in this case) + * -1 if some other error occurred (`dst' is untouched in this case, too) + * author: + * Paul Vixie, 1996. + */ +static int +my_inet_pton(int af, const char *src, void *dst) +{ + switch (af) { + case AF_INET: + return inet_pton4(src, dst); + case AF_INET6: + return inet_pton6(src, dst); + default: + errno = EAFNOSUPPORT; + return -1; + } + /* NOTREACHED */ +} + +/* int + * inet_pton4(src, dst) + * like inet_aton() but without all the hexadecimal and shorthand. + * return: + * 1 if `src' is a valid dotted quad, else 0. + * notice: + * does not touch `dst' unless it's returning 1. + * author: + * Paul Vixie, 1996. + */ +static int +inet_pton4(const char *src, unsigned char *dst) +{ + static const char digits[] = "0123456789"; + int saw_digit, octets, ch; + unsigned char tmp[INADDRSZ], *tp; + + saw_digit = 0; + octets = 0; + *(tp = tmp) = 0; + while ((ch = *src++) != '\0') { + const char *pch; + + if ((pch = strchr(digits, ch)) != NULL) { + unsigned int new = *tp * 10 + (pch - digits); + + if (new > 255) + return 0; + if (! saw_digit) { + if (++octets > 4) + return 0; + saw_digit = 1; + } + *tp = (unsigned char)new; + } else if (ch == '.' && saw_digit) { + if (octets == 4) + return 0; + *++tp = 0; + saw_digit = 0; + } else + return 0; + } + if (octets < 4) + return 0; + + memcpy(dst, tmp, INADDRSZ); + return 1; +} + +/* int + * inet_pton6(src, dst) + * convert presentation level address to network order binary form. + * return: + * 1 if `src' is a valid [RFC1884 2.2] address, else 0. + * notice: + * (1) does not touch `dst' unless it's returning 1. + * (2) :: in a full address is silently ignored. + * credit: + * inspired by Mark Andrews. + * author: + * Paul Vixie, 1996. + */ +static int +inet_pton6(const char *src, unsigned char *dst) +{ + static const char xdigits_l[] = "0123456789abcdef", + xdigits_u[] = "0123456789ABCDEF"; + unsigned char tmp[IN6ADDRSZ], *tp = 0, *endp = 0, *colonp = 0; + const char *xdigits = 0, *curtok = 0; + int ch = 0, saw_xdigit = 0, count_xdigit = 0; + unsigned int val = 0; + unsigned dbloct_count = 0; + + memset((tp = tmp), '\0', IN6ADDRSZ); + endp = tp + IN6ADDRSZ; + colonp = NULL; + /* Leading :: requires some special handling. */ + if (*src == ':') + if (*++src != ':') + return 0; + curtok = src; + saw_xdigit = count_xdigit = 0; + val = 0; + + while ((ch = *src++) != '\0') { + const char *pch; + + if ((pch = strchr((xdigits = xdigits_l), ch)) == NULL) + pch = strchr((xdigits = xdigits_u), ch); + if (pch != NULL) { + if (count_xdigit >= 4) + return 0; + val <<= 4; + val |= (pch - xdigits); + if (val > 0xffff) + return 0; + saw_xdigit = 1; + count_xdigit++; + continue; + } + if (ch == ':') { + curtok = src; + if (!saw_xdigit) { + if (colonp) + return 0; + colonp = tp; + continue; + } else if (*src == '\0') { + return 0; + } + if (tp + sizeof(int16_t) > endp) + return 0; + *tp++ = (unsigned char) ((val >> 8) & 0xff); + *tp++ = (unsigned char) (val & 0xff); + saw_xdigit = 0; + count_xdigit = 0; + val = 0; + dbloct_count++; + continue; + } + if (ch == '.' && ((tp + INADDRSZ) <= endp) && + inet_pton4(curtok, tp) > 0) { + tp += INADDRSZ; + saw_xdigit = 0; + dbloct_count += 2; + break; /* '\0' was seen by inet_pton4(). */ + } + return 0; + } + if (saw_xdigit) { + if (tp + sizeof(int16_t) > endp) + return 0; + *tp++ = (unsigned char) ((val >> 8) & 0xff); + *tp++ = (unsigned char) (val & 0xff); + dbloct_count++; + } + if (colonp != NULL) { + /* if we already have 8 double octets, having a colon means error */ + if (dbloct_count == 8) + return 0; + + /* + * Since some memmove()'s erroneously fail to handle + * overlapping regions, we'll do the shift by hand. + */ + const int n = tp - colonp; + int i; + + for (i = 1; i <= n; i++) { + endp[- i] = colonp[n - i]; + colonp[n - i] = 0; + } + tp = endp; + } + if (tp != endp) + return 0; + memcpy(dst, tmp, IN6ADDRSZ); + return 1; +} + +int +cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, + unsigned ressize) +{ + struct cmdline_token_ipaddr *tk2; + unsigned int token_len = 0; + char ip_str[INET6_ADDRSTRLEN+4+1]; /* '+4' is for prefixlen (if any) */ + cmdline_ipaddr_t ipaddr; + char *prefix, *prefix_end; + long prefixlen = 0; + + if (res && ressize < sizeof(cmdline_ipaddr_t)) + return -1; + + if (!buf || !tk || ! *buf) + return -1; + + tk2 = (struct cmdline_token_ipaddr *)tk; + + while (!cmdline_isendoftoken(buf[token_len])) + token_len++; + + /* if token is too big... */ + if (token_len >= INET6_ADDRSTRLEN+4) + return -1; + + snprintf(ip_str, token_len+1, "%s", buf); + + /* convert the network prefix */ + if (tk2->ipaddr_data.flags & CMDLINE_IPADDR_NETWORK) { + prefix = strrchr(ip_str, '/'); + if (prefix == NULL) + return -1; + *prefix = '\0'; + prefix ++; + errno = 0; + prefixlen = strtol(prefix, &prefix_end, 10); + if (errno || (*prefix_end != '\0') + || prefixlen < 0 || prefixlen > PREFIXMAX) + return -1; + ipaddr.prefixlen = prefixlen; + } + else { + ipaddr.prefixlen = 0; + } + + /* convert the IP addr */ + if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V4) && + my_inet_pton(AF_INET, ip_str, &ipaddr.addr.ipv4) == 1 && + prefixlen <= V4PREFIXMAX) { + ipaddr.family = AF_INET; + if (res) + memcpy(res, &ipaddr, sizeof(ipaddr)); + return token_len; + } + if ((tk2->ipaddr_data.flags & CMDLINE_IPADDR_V6) && + my_inet_pton(AF_INET6, ip_str, &ipaddr.addr.ipv6) == 1) { + ipaddr.family = AF_INET6; + if (res) + memcpy(res, &ipaddr, sizeof(ipaddr)); + return token_len; + } + return -1; + +} + +int cmdline_get_help_ipaddr(cmdline_parse_token_hdr_t *tk, char *dstbuf, + unsigned int size) +{ + struct cmdline_token_ipaddr *tk2; + + if (!tk || !dstbuf) + return -1; + + tk2 = (struct cmdline_token_ipaddr *)tk; + + switch (tk2->ipaddr_data.flags) { + case CMDLINE_IPADDR_V4: + snprintf(dstbuf, size, "IPv4"); + break; + case CMDLINE_IPADDR_V6: + snprintf(dstbuf, size, "IPv6"); + break; + case CMDLINE_IPADDR_V4|CMDLINE_IPADDR_V6: + snprintf(dstbuf, size, "IPv4/IPv6"); + break; + case CMDLINE_IPADDR_NETWORK|CMDLINE_IPADDR_V4: + snprintf(dstbuf, size, "IPv4 network"); + break; + case CMDLINE_IPADDR_NETWORK|CMDLINE_IPADDR_V6: + snprintf(dstbuf, size, "IPv6 network"); + break; + case CMDLINE_IPADDR_NETWORK|CMDLINE_IPADDR_V4|CMDLINE_IPADDR_V6: + snprintf(dstbuf, size, "IPv4/IPv6 network"); + break; + default: + snprintf(dstbuf, size, "IPaddr (bad flags)"); + break; + } + return 0; +} diff --git a/lib/librte_cmdline/cmdline_parse_ipaddr.h b/lib/librte_cmdline/cmdline_parse_ipaddr.h new file mode 100644 index 00000000..2b4266fd --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_ipaddr.h @@ -0,0 +1,189 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PARSE_IPADDR_H_ +#define _PARSE_IPADDR_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define CMDLINE_IPADDR_V4 0x01 +#define CMDLINE_IPADDR_V6 0x02 +#define CMDLINE_IPADDR_NETWORK 0x04 + +struct cmdline_ipaddr { + uint8_t family; + union { + struct in_addr ipv4; + struct in6_addr ipv6; + } addr; + unsigned int prefixlen; /* in case of network only */ +}; +typedef struct cmdline_ipaddr cmdline_ipaddr_t; + +struct cmdline_token_ipaddr_data { + uint8_t flags; +}; + +struct cmdline_token_ipaddr { + struct cmdline_token_hdr hdr; + struct cmdline_token_ipaddr_data ipaddr_data; +}; +typedef struct cmdline_token_ipaddr cmdline_parse_token_ipaddr_t; + +extern struct cmdline_token_ops cmdline_token_ipaddr_ops; + +int cmdline_parse_ipaddr(cmdline_parse_token_hdr_t *tk, const char *srcbuf, + void *res, unsigned ressize); +int cmdline_get_help_ipaddr(cmdline_parse_token_hdr_t *tk, char *dstbuf, + unsigned int size); + +#define TOKEN_IPADDR_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_ipaddr_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* ipaddr_data */ \ + { \ + CMDLINE_IPADDR_V4 | /* flags */ \ + CMDLINE_IPADDR_V6, \ + }, \ +} + +#define TOKEN_IPV4_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_ipaddr_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* ipaddr_data */ \ + { \ + CMDLINE_IPADDR_V4, /* flags */ \ + }, \ +} + +#define TOKEN_IPV6_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_ipaddr_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* ipaddr_data */ \ + { \ + CMDLINE_IPADDR_V6, /* flags */ \ + }, \ +} + +#define TOKEN_IPNET_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_ipaddr_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* ipaddr_data */ \ + { \ + CMDLINE_IPADDR_V4 | /* flags */ \ + CMDLINE_IPADDR_V6 | \ + CMDLINE_IPADDR_NETWORK, \ + }, \ +} + +#define TOKEN_IPV4NET_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_ipaddr_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* ipaddr_data */ \ + { \ + CMDLINE_IPADDR_V4 | /* flags */ \ + CMDLINE_IPADDR_NETWORK, \ + }, \ +} + +#define TOKEN_IPV6NET_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_ipaddr_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* ipaddr_data */ \ + { \ + CMDLINE_IPADDR_V4 | /* flags */ \ + CMDLINE_IPADDR_NETWORK, \ + }, \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _PARSE_IPADDR_H_ */ diff --git a/lib/librte_cmdline/cmdline_parse_num.c b/lib/librte_cmdline/cmdline_parse_num.c new file mode 100644 index 00000000..b0f9a35d --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_num.c @@ -0,0 +1,402 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cmdline_parse.h" +#include "cmdline_parse_num.h" + +#ifdef RTE_LIBRTE_CMDLINE_DEBUG +#define debug_printf(args...) printf(args) +#else +#define debug_printf(args...) do {} while(0) +#endif + +struct cmdline_token_ops cmdline_token_num_ops = { + .parse = cmdline_parse_num, + .complete_get_nb = NULL, + .complete_get_elt = NULL, + .get_help = cmdline_get_help_num, +}; + + +enum num_parse_state_t { + START, + DEC_NEG, + BIN, + HEX, + + ERROR, + + FIRST_OK, /* not used */ + ZERO_OK, + HEX_OK, + OCTAL_OK, + BIN_OK, + DEC_NEG_OK, + DEC_POS_OK, +}; + +/* Keep it sync with enum in .h */ +static const char * num_help[] = { + "UINT8", "UINT16", "UINT32", "UINT64", + "INT8", "INT16", "INT32", "INT64", +}; + +static inline int +add_to_res(unsigned int c, uint64_t *res, unsigned int base) +{ + /* overflow */ + if ( (UINT64_MAX - c) / base < *res ) { + return -1; + } + + *res = (uint64_t) (*res * base + c); + return 0; +} + +static int +check_res_size(struct cmdline_token_num_data *nd, unsigned ressize) +{ + switch (nd->type) { + case INT8: + case UINT8: + if (ressize < sizeof(int8_t)) + return -1; + break; + case INT16: + case UINT16: + if (ressize < sizeof(int16_t)) + return -1; + break; + case INT32: + case UINT32: + if (ressize < sizeof(int32_t)) + return -1; + break; + case INT64: + case UINT64: + if (ressize < sizeof(int64_t)) + return -1; + break; + default: + return -1; + } + return 0; +} + +/* parse an int */ +int +cmdline_parse_num(cmdline_parse_token_hdr_t *tk, const char *srcbuf, void *res, + unsigned ressize) +{ + struct cmdline_token_num_data nd; + enum num_parse_state_t st = START; + const char * buf; + char c; + uint64_t res1 = 0; + + if (!tk) + return -1; + + if (!srcbuf || !*srcbuf) + return -1; + + buf = srcbuf; + c = *buf; + + memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd)); + + /* check that we have enough room in res */ + if (res) { + if (check_res_size(&nd, ressize) < 0) + return -1; + } + + while ( st != ERROR && c && ! cmdline_isendoftoken(c) ) { + debug_printf("%c %x -> ", c, c); + switch (st) { + case START: + if (c == '-') { + st = DEC_NEG; + } + else if (c == '0') { + st = ZERO_OK; + } + else if (c >= '1' && c <= '9') { + if (add_to_res(c - '0', &res1, 10) < 0) + st = ERROR; + else + st = DEC_POS_OK; + } + else { + st = ERROR; + } + break; + + case ZERO_OK: + if (c == 'x') { + st = HEX; + } + else if (c == 'b') { + st = BIN; + } + else if (c >= '0' && c <= '7') { + if (add_to_res(c - '0', &res1, 10) < 0) + st = ERROR; + else + st = OCTAL_OK; + } + else { + st = ERROR; + } + break; + + case DEC_NEG: + if (c >= '0' && c <= '9') { + if (add_to_res(c - '0', &res1, 10) < 0) + st = ERROR; + else + st = DEC_NEG_OK; + } + else { + st = ERROR; + } + break; + + case DEC_NEG_OK: + if (c >= '0' && c <= '9') { + if (add_to_res(c - '0', &res1, 10) < 0) + st = ERROR; + } + else { + st = ERROR; + } + break; + + case DEC_POS_OK: + if (c >= '0' && c <= '9') { + if (add_to_res(c - '0', &res1, 10) < 0) + st = ERROR; + } + else { + st = ERROR; + } + break; + + case HEX: + st = HEX_OK; + /* no break */ + case HEX_OK: + if (c >= '0' && c <= '9') { + if (add_to_res(c - '0', &res1, 16) < 0) + st = ERROR; + } + else if (c >= 'a' && c <= 'f') { + if (add_to_res(c - 'a' + 10, &res1, 16) < 0) + st = ERROR; + } + else if (c >= 'A' && c <= 'F') { + if (add_to_res(c - 'A' + 10, &res1, 16) < 0) + st = ERROR; + } + else { + st = ERROR; + } + break; + + + case OCTAL_OK: + if (c >= '0' && c <= '7') { + if (add_to_res(c - '0', &res1, 8) < 0) + st = ERROR; + } + else { + st = ERROR; + } + break; + + case BIN: + st = BIN_OK; + /* no break */ + case BIN_OK: + if (c >= '0' && c <= '1') { + if (add_to_res(c - '0', &res1, 2) < 0) + st = ERROR; + } + else { + st = ERROR; + } + break; + default: + debug_printf("not impl "); + + } + + debug_printf("(%"PRIu64")\n", res1); + + buf ++; + c = *buf; + + /* token too long */ + if (buf-srcbuf > 127) + return -1; + } + + switch (st) { + case ZERO_OK: + case DEC_POS_OK: + case HEX_OK: + case OCTAL_OK: + case BIN_OK: + if ( nd.type == INT8 && res1 <= INT8_MAX ) { + if (res) *(int8_t *)res = (int8_t) res1; + return buf-srcbuf; + } + else if ( nd.type == INT16 && res1 <= INT16_MAX ) { + if (res) *(int16_t *)res = (int16_t) res1; + return buf-srcbuf; + } + else if ( nd.type == INT32 && res1 <= INT32_MAX ) { + if (res) *(int32_t *)res = (int32_t) res1; + return buf-srcbuf; + } + else if ( nd.type == INT64 && res1 <= INT64_MAX ) { + if (res) *(int64_t *)res = (int64_t) res1; + return buf-srcbuf; + } + else if ( nd.type == UINT8 && res1 <= UINT8_MAX ) { + if (res) *(uint8_t *)res = (uint8_t) res1; + return buf-srcbuf; + } + else if (nd.type == UINT16 && res1 <= UINT16_MAX ) { + if (res) *(uint16_t *)res = (uint16_t) res1; + return buf-srcbuf; + } + else if ( nd.type == UINT32 && res1 <= UINT32_MAX ) { + if (res) *(uint32_t *)res = (uint32_t) res1; + return buf-srcbuf; + } + else if ( nd.type == UINT64 ) { + if (res) *(uint64_t *)res = res1; + return buf-srcbuf; + } + else { + return -1; + } + break; + + case DEC_NEG_OK: + if ( nd.type == INT8 && res1 <= INT8_MAX + 1 ) { + if (res) *(int8_t *)res = (int8_t) (-res1); + return buf-srcbuf; + } + else if ( nd.type == INT16 && res1 <= (uint16_t)INT16_MAX + 1 ) { + if (res) *(int16_t *)res = (int16_t) (-res1); + return buf-srcbuf; + } + else if ( nd.type == INT32 && res1 <= (uint32_t)INT32_MAX + 1 ) { + if (res) *(int32_t *)res = (int32_t) (-res1); + return buf-srcbuf; + } + else if ( nd.type == INT64 && res1 <= (uint64_t)INT64_MAX + 1 ) { + if (res) *(int64_t *)res = (int64_t) (-res1); + return buf-srcbuf; + } + else { + return -1; + } + break; + default: + debug_printf("error\n"); + return -1; + } +} + + +/* parse an int */ +int +cmdline_get_help_num(cmdline_parse_token_hdr_t *tk, char *dstbuf, unsigned int size) +{ + struct cmdline_token_num_data nd; + int ret; + + if (!tk) + return -1; + + memcpy(&nd, &((struct cmdline_token_num *)tk)->num_data, sizeof(nd)); + + /* should not happen.... don't so this test */ + /* if (nd.type >= (sizeof(num_help)/sizeof(const char *))) */ + /* return -1; */ + + ret = snprintf(dstbuf, size, "%s", num_help[nd.type]); + if (ret < 0) + return -1; + dstbuf[size-1] = '\0'; + return 0; +} diff --git a/lib/librte_cmdline/cmdline_parse_num.h b/lib/librte_cmdline/cmdline_parse_num.h new file mode 100644 index 00000000..2558cbf0 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_num.h @@ -0,0 +1,115 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PARSE_NUM_H_ +#define _PARSE_NUM_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +enum cmdline_numtype { + UINT8 = 0, + UINT16, + UINT32, + UINT64, + INT8, + INT16, + INT32, + INT64 +}; + +struct cmdline_token_num_data { + enum cmdline_numtype type; +}; + +struct cmdline_token_num { + struct cmdline_token_hdr hdr; + struct cmdline_token_num_data num_data; +}; +typedef struct cmdline_token_num cmdline_parse_token_num_t; + +extern struct cmdline_token_ops cmdline_token_num_ops; + +int cmdline_parse_num(cmdline_parse_token_hdr_t *tk, + const char *srcbuf, void *res, unsigned ressize); +int cmdline_get_help_num(cmdline_parse_token_hdr_t *tk, + char *dstbuf, unsigned int size); + +#define TOKEN_NUM_INITIALIZER(structure, field, numtype) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_num_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* num_data */ \ + { \ + numtype, /* type */ \ + }, \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _PARSE_NUM_H_ */ diff --git a/lib/librte_cmdline/cmdline_parse_portlist.c b/lib/librte_cmdline/cmdline_parse_portlist.c new file mode 100644 index 00000000..f11bdf03 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_portlist.c @@ -0,0 +1,173 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2010, Keith Wiles + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include "cmdline_parse.h" +#include "cmdline_parse_portlist.h" + +struct cmdline_token_ops cmdline_token_portlist_ops = { + .parse = cmdline_parse_portlist, + .complete_get_nb = NULL, + .complete_get_elt = NULL, + .get_help = cmdline_get_help_portlist, +}; + +static void +parse_set_list(cmdline_portlist_t *pl, size_t low, size_t high) +{ + do { + pl->map |= (1 << low++); + } while (low <= high); +} + +static int +parse_ports(cmdline_portlist_t *pl, const char *str) +{ + size_t ps, pe; + const char *first, *last; + char *end; + + for (first = str, last = first; + first != NULL && last != NULL; + first = last + 1) { + + last = strchr(first, ','); + + errno = 0; + ps = strtoul(first, &end, 10); + if (errno != 0 || end == first || + (end[0] != '-' && end[0] != 0 && end != last)) + return -1; + + /* Support for N-M portlist format */ + if (end[0] == '-') { + errno = 0; + first = end + 1; + pe = strtoul(first, &end, 10); + if (errno != 0 || end == first || + (end[0] != 0 && end != last)) + return -1; + } else { + pe = ps; + } + + if (ps > pe || pe >= sizeof (pl->map) * 8) + return -1; + + parse_set_list(pl, ps, pe); + } + + return 0; +} + +int +cmdline_parse_portlist(__attribute__((unused)) cmdline_parse_token_hdr_t *tk, + const char *buf, void *res, unsigned ressize) +{ + unsigned int token_len = 0; + char portlist_str[PORTLIST_TOKEN_SIZE+1]; + cmdline_portlist_t *pl; + + if (!buf || ! *buf) + return -1; + + if (res && ressize < sizeof(cmdline_portlist_t)) + return -1; + + pl = res; + + while (!cmdline_isendoftoken(buf[token_len]) && + (token_len < PORTLIST_TOKEN_SIZE)) + token_len++; + + if (token_len >= PORTLIST_TOKEN_SIZE) + return -1; + + snprintf(portlist_str, token_len+1, "%s", buf); + + if (pl) { + pl->map = 0; + if (strcmp("all", portlist_str) == 0) + pl->map = UINT32_MAX; + else if (parse_ports(pl, portlist_str) != 0) + return -1; + } + + return token_len; +} + +int +cmdline_get_help_portlist(__attribute__((unused)) cmdline_parse_token_hdr_t *tk, + char *dstbuf, unsigned int size) +{ + int ret; + ret = snprintf(dstbuf, size, "range of ports as 3,4-6,8-19,20"); + if (ret < 0) + return -1; + return 0; +} diff --git a/lib/librte_cmdline/cmdline_parse_portlist.h b/lib/librte_cmdline/cmdline_parse_portlist.h new file mode 100644 index 00000000..73d70e05 --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_portlist.h @@ -0,0 +1,103 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2010, Keith Wiles + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PARSE_PORTLIST_H_ +#define _PARSE_PORTLIST_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* size of a parsed string */ +#define PORTLIST_TOKEN_SIZE 128 +#define PORTLIST_MAX_TOKENS 32 + +typedef struct cmdline_portlist { + uint32_t map; +} cmdline_portlist_t; + +struct cmdline_token_portlist { + struct cmdline_token_hdr hdr; +}; +typedef struct cmdline_token_portlist cmdline_parse_token_portlist_t; + +extern struct cmdline_token_ops cmdline_token_portlist_ops; + +int cmdline_parse_portlist(cmdline_parse_token_hdr_t *tk, + const char *srcbuf, void *res, unsigned ressize); +int cmdline_get_help_portlist(cmdline_parse_token_hdr_t *tk, + char *dstbuf, unsigned int size); + +#define TOKEN_PORTLIST_INITIALIZER(structure, field) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_portlist_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _PARSE_PORTLIST_H_ */ diff --git a/lib/librte_cmdline/cmdline_parse_string.c b/lib/librte_cmdline/cmdline_parse_string.c new file mode 100644 index 00000000..45883b3e --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_string.c @@ -0,0 +1,253 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cmdline_parse.h" +#include "cmdline_parse_string.h" + +struct cmdline_token_ops cmdline_token_string_ops = { + .parse = cmdline_parse_string, + .complete_get_nb = cmdline_complete_get_nb_string, + .complete_get_elt = cmdline_complete_get_elt_string, + .get_help = cmdline_get_help_string, +}; + +#define MULTISTRING_HELP "Mul-choice STRING" +#define ANYSTRING_HELP "Any STRING" +#define FIXEDSTRING_HELP "Fixed STRING" + +static unsigned int +get_token_len(const char *s) +{ + char c; + unsigned int i=0; + + c = s[i]; + while (c!='#' && c!='\0') { + i++; + c = s[i]; + } + return i; +} + +static const char * +get_next_token(const char *s) +{ + unsigned int i; + i = get_token_len(s); + if (s[i] == '#') + return s+i+1; + return NULL; +} + +int +cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *buf, void *res, + unsigned ressize) +{ + struct cmdline_token_string *tk2; + struct cmdline_token_string_data *sd; + unsigned int token_len; + const char *str; + + if (res && ressize < STR_TOKEN_SIZE) + return -1; + + if (!tk || !buf || ! *buf) + return -1; + + tk2 = (struct cmdline_token_string *)tk; + + sd = &tk2->string_data; + + /* fixed string */ + if (sd->str) { + str = sd->str; + do { + token_len = get_token_len(str); + + /* if token is too big... */ + if (token_len >= STR_TOKEN_SIZE - 1) { + continue; + } + + if ( strncmp(buf, str, token_len) ) { + continue; + } + + if ( !cmdline_isendoftoken(*(buf+token_len)) ) { + continue; + } + + break; + } while ( (str = get_next_token(str)) != NULL ); + + if (!str) + return -1; + } + /* unspecified string */ + else { + token_len = 0; + while(!cmdline_isendoftoken(buf[token_len]) && + token_len < (STR_TOKEN_SIZE-1)) + token_len++; + + /* return if token too long */ + if (token_len >= STR_TOKEN_SIZE - 1) { + return -1; + } + } + + if (res) { + /* we are sure that token_len is < STR_TOKEN_SIZE-1 */ + snprintf(res, STR_TOKEN_SIZE, "%s", buf); + *((char *)res + token_len) = 0; + } + + + return token_len; +} + +int cmdline_complete_get_nb_string(cmdline_parse_token_hdr_t *tk) +{ + struct cmdline_token_string *tk2; + struct cmdline_token_string_data *sd; + const char *str; + int ret = 1; + + if (!tk) + return -1; + + tk2 = (struct cmdline_token_string *)tk; + sd = &tk2->string_data; + + if (!sd->str) + return 0; + + str = sd->str; + while( (str = get_next_token(str)) != NULL ) { + ret++; + } + return ret; +} + +int cmdline_complete_get_elt_string(cmdline_parse_token_hdr_t *tk, int idx, + char *dstbuf, unsigned int size) +{ + struct cmdline_token_string *tk2; + struct cmdline_token_string_data *sd; + const char *s; + unsigned int len; + + if (!tk || !dstbuf || idx < 0) + return -1; + + tk2 = (struct cmdline_token_string *)tk; + sd = &tk2->string_data; + + s = sd->str; + + while (idx-- && s) + s = get_next_token(s); + + if (!s) + return -1; + + len = get_token_len(s); + if (len > size - 1) + return -1; + + memcpy(dstbuf, s, len); + dstbuf[len] = '\0'; + return 0; +} + + +int cmdline_get_help_string(cmdline_parse_token_hdr_t *tk, char *dstbuf, + unsigned int size) +{ + struct cmdline_token_string *tk2; + struct cmdline_token_string_data *sd; + const char *s; + + if (!tk || !dstbuf) + return -1; + + tk2 = (struct cmdline_token_string *)tk; + sd = &tk2->string_data; + + s = sd->str; + + if (s) { + if (get_next_token(s)) + snprintf(dstbuf, size, MULTISTRING_HELP); + else + snprintf(dstbuf, size, FIXEDSTRING_HELP); + } else + snprintf(dstbuf, size, ANYSTRING_HELP); + + return 0; +} diff --git a/lib/librte_cmdline/cmdline_parse_string.h b/lib/librte_cmdline/cmdline_parse_string.h new file mode 100644 index 00000000..94aa1f1b --- /dev/null +++ b/lib/librte_cmdline/cmdline_parse_string.h @@ -0,0 +1,112 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _PARSE_STRING_H_ +#define _PARSE_STRING_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* size of a parsed string */ +#define STR_TOKEN_SIZE 128 + +typedef char cmdline_fixed_string_t[STR_TOKEN_SIZE]; + +struct cmdline_token_string_data { + const char *str; +}; + +struct cmdline_token_string { + struct cmdline_token_hdr hdr; + struct cmdline_token_string_data string_data; +}; +typedef struct cmdline_token_string cmdline_parse_token_string_t; + +extern struct cmdline_token_ops cmdline_token_string_ops; + +int cmdline_parse_string(cmdline_parse_token_hdr_t *tk, const char *srcbuf, + void *res, unsigned ressize); +int cmdline_complete_get_nb_string(cmdline_parse_token_hdr_t *tk); +int cmdline_complete_get_elt_string(cmdline_parse_token_hdr_t *tk, int idx, + char *dstbuf, unsigned int size); +int cmdline_get_help_string(cmdline_parse_token_hdr_t *tk, char *dstbuf, + unsigned int size); + +#define TOKEN_STRING_INITIALIZER(structure, field, string) \ +{ \ + /* hdr */ \ + { \ + &cmdline_token_string_ops, /* ops */ \ + offsetof(structure, field), /* offset */ \ + }, \ + /* string_data */ \ + { \ + string, /* str */ \ + }, \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _PARSE_STRING_H_ */ diff --git a/lib/librte_cmdline/cmdline_rdline.c b/lib/librte_cmdline/cmdline_rdline.c new file mode 100644 index 00000000..1ef2258d --- /dev/null +++ b/lib/librte_cmdline/cmdline_rdline.c @@ -0,0 +1,697 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cmdline_cirbuf.h" +#include "cmdline_rdline.h" + +static void rdline_puts(struct rdline *rdl, const char *buf); +static void rdline_miniprintf(struct rdline *rdl, + const char *buf, unsigned int val); + +static void rdline_remove_old_history_item(struct rdline *rdl); +static void rdline_remove_first_history_item(struct rdline *rdl); +static unsigned int rdline_get_history_size(struct rdline *rdl); + + +/* isblank() needs _XOPEN_SOURCE >= 600 || _ISOC99_SOURCE, so use our + * own. */ +static int +isblank2(char c) +{ + if (c == ' ' || + c == '\t' ) + return 1; + return 0; +} + +int +rdline_init(struct rdline *rdl, + rdline_write_char_t *write_char, + rdline_validate_t *validate, + rdline_complete_t *complete) +{ + if (!rdl || !write_char || !validate || !complete) + return -EINVAL; + memset(rdl, 0, sizeof(*rdl)); + rdl->validate = validate; + rdl->complete = complete; + rdl->write_char = write_char; + rdl->status = RDLINE_INIT; + return cirbuf_init(&rdl->history, rdl->history_buf, 0, RDLINE_HISTORY_BUF_SIZE); +} + +void +rdline_newline(struct rdline *rdl, const char *prompt) +{ + unsigned int i; + + if (!rdl || !prompt) + return; + + vt100_init(&rdl->vt100); + cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE); + cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE); + + rdl->prompt_size = strnlen(prompt, RDLINE_PROMPT_SIZE-1); + if (prompt != rdl->prompt) + memcpy(rdl->prompt, prompt, rdl->prompt_size); + rdl->prompt[RDLINE_PROMPT_SIZE-1] = '\0'; + + for (i=0 ; iprompt_size ; i++) + rdl->write_char(rdl, rdl->prompt[i]); + rdl->status = RDLINE_RUNNING; + + rdl->history_cur_line = -1; +} + +void +rdline_stop(struct rdline *rdl) +{ + if (!rdl) + return; + rdl->status = RDLINE_INIT; +} + +void +rdline_quit(struct rdline *rdl) +{ + if (!rdl) + return; + rdl->status = RDLINE_EXITED; +} + +void +rdline_restart(struct rdline *rdl) +{ + if (!rdl) + return; + rdl->status = RDLINE_RUNNING; +} + +void +rdline_reset(struct rdline *rdl) +{ + if (!rdl) + return; + vt100_init(&rdl->vt100); + cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE); + cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE); + + rdl->status = RDLINE_RUNNING; + + rdl->history_cur_line = -1; +} + +const char * +rdline_get_buffer(struct rdline *rdl) +{ + if (!rdl) + return NULL; + unsigned int len_l, len_r; + cirbuf_align_left(&rdl->left); + cirbuf_align_left(&rdl->right); + + len_l = CIRBUF_GET_LEN(&rdl->left); + len_r = CIRBUF_GET_LEN(&rdl->right); + memcpy(rdl->left_buf+len_l, rdl->right_buf, len_r); + + rdl->left_buf[len_l + len_r] = '\n'; + rdl->left_buf[len_l + len_r + 1] = '\0'; + return rdl->left_buf; +} + +static void +display_right_buffer(struct rdline *rdl, int force) +{ + unsigned int i; + char tmp; + + if (!force && CIRBUF_IS_EMPTY(&rdl->right)) + return; + + rdline_puts(rdl, vt100_clear_right); + CIRBUF_FOREACH(&rdl->right, i, tmp) { + rdl->write_char(rdl, tmp); + } + if (!CIRBUF_IS_EMPTY(&rdl->right)) + rdline_miniprintf(rdl, vt100_multi_left, + CIRBUF_GET_LEN(&rdl->right)); +} + +void +rdline_redisplay(struct rdline *rdl) +{ + unsigned int i; + char tmp; + + if (!rdl) + return; + + rdline_puts(rdl, vt100_home); + for (i=0 ; iprompt_size ; i++) + rdl->write_char(rdl, rdl->prompt[i]); + CIRBUF_FOREACH(&rdl->left, i, tmp) { + rdl->write_char(rdl, tmp); + } + display_right_buffer(rdl, 1); +} + +int +rdline_char_in(struct rdline *rdl, char c) +{ + unsigned int i; + int cmd; + char tmp; + char *buf; + + if (!rdl) + return -EINVAL; + + if (rdl->status == RDLINE_EXITED) + return RDLINE_RES_EXITED; + if (rdl->status != RDLINE_RUNNING) + return RDLINE_RES_NOT_RUNNING; + + cmd = vt100_parser(&rdl->vt100, c); + if (cmd == -2) + return RDLINE_RES_SUCCESS; + + if (cmd >= 0) { + switch (cmd) { + /* move caret 1 char to the left */ + case CMDLINE_KEY_CTRL_B: + case CMDLINE_KEY_LEFT_ARR: + if (CIRBUF_IS_EMPTY(&rdl->left)) + break; + tmp = cirbuf_get_tail(&rdl->left); + cirbuf_del_tail(&rdl->left); + cirbuf_add_head(&rdl->right, tmp); + rdline_puts(rdl, vt100_left_arr); + break; + + /* move caret 1 char to the right */ + case CMDLINE_KEY_CTRL_F: + case CMDLINE_KEY_RIGHT_ARR: + if (CIRBUF_IS_EMPTY(&rdl->right)) + break; + tmp = cirbuf_get_head(&rdl->right); + cirbuf_del_head(&rdl->right); + cirbuf_add_tail(&rdl->left, tmp); + rdline_puts(rdl, vt100_right_arr); + break; + + /* move caret 1 word to the left */ + /* keyboard equivalent: Alt+B */ + case CMDLINE_KEY_WLEFT: + while (! CIRBUF_IS_EMPTY(&rdl->left) && + (tmp = cirbuf_get_tail(&rdl->left)) && + isblank2(tmp)) { + rdline_puts(rdl, vt100_left_arr); + cirbuf_del_tail(&rdl->left); + cirbuf_add_head(&rdl->right, tmp); + } + while (! CIRBUF_IS_EMPTY(&rdl->left) && + (tmp = cirbuf_get_tail(&rdl->left)) && + !isblank2(tmp)) { + rdline_puts(rdl, vt100_left_arr); + cirbuf_del_tail(&rdl->left); + cirbuf_add_head(&rdl->right, tmp); + } + break; + + /* move caret 1 word to the right */ + /* keyboard equivalent: Alt+F */ + case CMDLINE_KEY_WRIGHT: + while (! CIRBUF_IS_EMPTY(&rdl->right) && + (tmp = cirbuf_get_head(&rdl->right)) && + isblank2(tmp)) { + rdline_puts(rdl, vt100_right_arr); + cirbuf_del_head(&rdl->right); + cirbuf_add_tail(&rdl->left, tmp); + } + while (! CIRBUF_IS_EMPTY(&rdl->right) && + (tmp = cirbuf_get_head(&rdl->right)) && + !isblank2(tmp)) { + rdline_puts(rdl, vt100_right_arr); + cirbuf_del_head(&rdl->right); + cirbuf_add_tail(&rdl->left, tmp); + } + break; + + /* move caret to the left */ + case CMDLINE_KEY_CTRL_A: + if (CIRBUF_IS_EMPTY(&rdl->left)) + break; + rdline_miniprintf(rdl, vt100_multi_left, + CIRBUF_GET_LEN(&rdl->left)); + while (! CIRBUF_IS_EMPTY(&rdl->left)) { + tmp = cirbuf_get_tail(&rdl->left); + cirbuf_del_tail(&rdl->left); + cirbuf_add_head(&rdl->right, tmp); + } + break; + + /* move caret to the right */ + case CMDLINE_KEY_CTRL_E: + if (CIRBUF_IS_EMPTY(&rdl->right)) + break; + rdline_miniprintf(rdl, vt100_multi_right, + CIRBUF_GET_LEN(&rdl->right)); + while (! CIRBUF_IS_EMPTY(&rdl->right)) { + tmp = cirbuf_get_head(&rdl->right); + cirbuf_del_head(&rdl->right); + cirbuf_add_tail(&rdl->left, tmp); + } + break; + + /* delete 1 char from the left */ + case CMDLINE_KEY_BKSPACE: + if(!cirbuf_del_tail_safe(&rdl->left)) { + rdline_puts(rdl, vt100_bs); + display_right_buffer(rdl, 1); + } + break; + + /* delete 1 char from the right */ + case CMDLINE_KEY_SUPPR: + case CMDLINE_KEY_CTRL_D: + if (cmd == CMDLINE_KEY_CTRL_D && + CIRBUF_IS_EMPTY(&rdl->left) && + CIRBUF_IS_EMPTY(&rdl->right)) { + return RDLINE_RES_EOF; + } + if (!cirbuf_del_head_safe(&rdl->right)) { + display_right_buffer(rdl, 1); + } + break; + + /* delete 1 word from the left */ + case CMDLINE_KEY_META_BKSPACE: + case CMDLINE_KEY_CTRL_W: + while (! CIRBUF_IS_EMPTY(&rdl->left) && isblank2(cirbuf_get_tail(&rdl->left))) { + rdline_puts(rdl, vt100_bs); + cirbuf_del_tail(&rdl->left); + } + while (! CIRBUF_IS_EMPTY(&rdl->left) && !isblank2(cirbuf_get_tail(&rdl->left))) { + rdline_puts(rdl, vt100_bs); + cirbuf_del_tail(&rdl->left); + } + display_right_buffer(rdl, 1); + break; + + /* delete 1 word from the right */ + case CMDLINE_KEY_META_D: + while (! CIRBUF_IS_EMPTY(&rdl->right) && isblank2(cirbuf_get_head(&rdl->right))) + cirbuf_del_head(&rdl->right); + while (! CIRBUF_IS_EMPTY(&rdl->right) && !isblank2(cirbuf_get_head(&rdl->right))) + cirbuf_del_head(&rdl->right); + display_right_buffer(rdl, 1); + break; + + /* set kill buffer to contents on the right side of caret */ + case CMDLINE_KEY_CTRL_K: + cirbuf_get_buf_head(&rdl->right, rdl->kill_buf, RDLINE_BUF_SIZE); + rdl->kill_size = CIRBUF_GET_LEN(&rdl->right); + cirbuf_del_buf_head(&rdl->right, rdl->kill_size); + rdline_puts(rdl, vt100_clear_right); + break; + + /* paste contents of kill buffer to the left side of caret */ + case CMDLINE_KEY_CTRL_Y: + i=0; + while(CIRBUF_GET_LEN(&rdl->right) + CIRBUF_GET_LEN(&rdl->left) < + RDLINE_BUF_SIZE && + i < rdl->kill_size) { + cirbuf_add_tail(&rdl->left, rdl->kill_buf[i]); + rdl->write_char(rdl, rdl->kill_buf[i]); + i++; + } + display_right_buffer(rdl, 0); + break; + + /* clear and newline */ + case CMDLINE_KEY_CTRL_C: + rdline_puts(rdl, "\r\n"); + rdline_newline(rdl, rdl->prompt); + break; + + /* redisplay (helps when prompt is lost in other output) */ + case CMDLINE_KEY_CTRL_L: + rdline_redisplay(rdl); + break; + + /* autocomplete */ + case CMDLINE_KEY_TAB: + case CMDLINE_KEY_HELP: + cirbuf_align_left(&rdl->left); + rdl->left_buf[CIRBUF_GET_LEN(&rdl->left)] = '\0'; + if (rdl->complete) { + char tmp_buf[BUFSIZ]; + int complete_state; + int ret; + unsigned int tmp_size; + + if (cmd == CMDLINE_KEY_TAB) + complete_state = 0; + else + complete_state = -1; + + /* see in parse.h for help on complete() */ + ret = rdl->complete(rdl, rdl->left_buf, + tmp_buf, sizeof(tmp_buf), + &complete_state); + /* no completion or error */ + if (ret <= 0) { + return RDLINE_RES_COMPLETE; + } + + tmp_size = strnlen(tmp_buf, sizeof(tmp_buf)); + /* add chars */ + if (ret == RDLINE_RES_COMPLETE) { + i=0; + while(CIRBUF_GET_LEN(&rdl->right) + CIRBUF_GET_LEN(&rdl->left) < + RDLINE_BUF_SIZE && + i < tmp_size) { + cirbuf_add_tail(&rdl->left, tmp_buf[i]); + rdl->write_char(rdl, tmp_buf[i]); + i++; + } + display_right_buffer(rdl, 1); + return RDLINE_RES_COMPLETE; /* ?? */ + } + + /* choice */ + rdline_puts(rdl, "\r\n"); + while (ret) { + rdl->write_char(rdl, ' '); + for (i=0 ; tmp_buf[i] ; i++) + rdl->write_char(rdl, tmp_buf[i]); + rdline_puts(rdl, "\r\n"); + ret = rdl->complete(rdl, rdl->left_buf, + tmp_buf, sizeof(tmp_buf), + &complete_state); + } + + rdline_redisplay(rdl); + } + return RDLINE_RES_COMPLETE; + + /* complete buffer */ + case CMDLINE_KEY_RETURN: + case CMDLINE_KEY_RETURN2: + rdline_get_buffer(rdl); + rdl->status = RDLINE_INIT; + rdline_puts(rdl, "\r\n"); + if (rdl->history_cur_line != -1) + rdline_remove_first_history_item(rdl); + + if (rdl->validate) + rdl->validate(rdl, rdl->left_buf, CIRBUF_GET_LEN(&rdl->left)+2); + /* user may have stopped rdline */ + if (rdl->status == RDLINE_EXITED) + return RDLINE_RES_EXITED; + return RDLINE_RES_VALIDATED; + + /* previous element in history */ + case CMDLINE_KEY_UP_ARR: + case CMDLINE_KEY_CTRL_P: + if (rdl->history_cur_line == 0) { + rdline_remove_first_history_item(rdl); + } + if (rdl->history_cur_line <= 0) { + rdline_add_history(rdl, rdline_get_buffer(rdl)); + rdl->history_cur_line = 0; + } + + buf = rdline_get_history_item(rdl, rdl->history_cur_line + 1); + if (!buf) + break; + + rdl->history_cur_line ++; + vt100_init(&rdl->vt100); + cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE); + cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE); + cirbuf_add_buf_tail(&rdl->left, buf, strnlen(buf, RDLINE_BUF_SIZE)); + rdline_redisplay(rdl); + break; + + /* next element in history */ + case CMDLINE_KEY_DOWN_ARR: + case CMDLINE_KEY_CTRL_N: + if (rdl->history_cur_line - 1 < 0) + break; + + rdl->history_cur_line --; + buf = rdline_get_history_item(rdl, rdl->history_cur_line); + if (!buf) + break; + vt100_init(&rdl->vt100); + cirbuf_init(&rdl->left, rdl->left_buf, 0, RDLINE_BUF_SIZE); + cirbuf_init(&rdl->right, rdl->right_buf, 0, RDLINE_BUF_SIZE); + cirbuf_add_buf_tail(&rdl->left, buf, strnlen(buf, RDLINE_BUF_SIZE)); + rdline_redisplay(rdl); + + break; + + + default: + break; + } + + return RDLINE_RES_SUCCESS; + } + + if (!isprint((int)c)) + return RDLINE_RES_SUCCESS; + + /* standard chars */ + if (CIRBUF_GET_LEN(&rdl->left) + CIRBUF_GET_LEN(&rdl->right) >= RDLINE_BUF_SIZE) + return RDLINE_RES_SUCCESS; + + if (cirbuf_add_tail_safe(&rdl->left, c)) + return RDLINE_RES_SUCCESS; + + rdl->write_char(rdl, c); + display_right_buffer(rdl, 0); + + return RDLINE_RES_SUCCESS; +} + + +/* HISTORY */ + +static void +rdline_remove_old_history_item(struct rdline * rdl) +{ + char tmp; + + while (! CIRBUF_IS_EMPTY(&rdl->history) ) { + tmp = cirbuf_get_head(&rdl->history); + cirbuf_del_head(&rdl->history); + if (!tmp) + break; + } +} + +static void +rdline_remove_first_history_item(struct rdline * rdl) +{ + char tmp; + + if ( CIRBUF_IS_EMPTY(&rdl->history) ) { + return; + } + else { + cirbuf_del_tail(&rdl->history); + } + + while (! CIRBUF_IS_EMPTY(&rdl->history) ) { + tmp = cirbuf_get_tail(&rdl->history); + if (!tmp) + break; + cirbuf_del_tail(&rdl->history); + } +} + +static unsigned int +rdline_get_history_size(struct rdline * rdl) +{ + unsigned int i, tmp, ret=0; + + CIRBUF_FOREACH(&rdl->history, i, tmp) { + if (tmp == 0) + ret ++; + } + + return ret; +} + +char * +rdline_get_history_item(struct rdline * rdl, unsigned int idx) +{ + unsigned int len, i, tmp; + + if (!rdl) + return NULL; + + len = rdline_get_history_size(rdl); + if ( idx >= len ) { + return NULL; + } + + cirbuf_align_left(&rdl->history); + + CIRBUF_FOREACH(&rdl->history, i, tmp) { + if ( idx == len - 1) { + return rdl->history_buf + i; + } + if (tmp == 0) + len --; + } + + return NULL; +} + +int +rdline_add_history(struct rdline * rdl, const char * buf) +{ + unsigned int len, i; + + if (!rdl || !buf) + return -EINVAL; + + len = strnlen(buf, RDLINE_BUF_SIZE); + for (i=0; i= RDLINE_HISTORY_BUF_SIZE ) + return -1; + + while ( len >= CIRBUF_GET_FREELEN(&rdl->history) ) { + rdline_remove_old_history_item(rdl); + } + + cirbuf_add_buf_tail(&rdl->history, buf, len); + cirbuf_add_tail(&rdl->history, 0); + + return 0; +} + +void +rdline_clear_history(struct rdline * rdl) +{ + if (!rdl) + return; + cirbuf_init(&rdl->history, rdl->history_buf, 0, RDLINE_HISTORY_BUF_SIZE); +} + + +/* STATIC USEFUL FUNCS */ + +static void +rdline_puts(struct rdline * rdl, const char * buf) +{ + char c; + while ( (c = *(buf++)) != '\0' ) { + rdl->write_char(rdl, c); + } +} + +/* a very very basic printf with one arg and one format 'u' */ +static void +rdline_miniprintf(struct rdline *rdl, const char * buf, unsigned int val) +{ + char c, started=0, div=100; + + while ( (c=*(buf++)) ) { + if (c != '%') { + rdl->write_char(rdl, c); + continue; + } + c = *(buf++); + if (c != 'u') { + rdl->write_char(rdl, '%'); + rdl->write_char(rdl, c); + continue; + } + /* val is never more than 255 */ + while (div) { + c = (char)(val / div); + if (c || started) { + rdl->write_char(rdl, (char)(c+'0')); + started = 1; + } + val %= div; + div /= 10; + } + } +} diff --git a/lib/librte_cmdline/cmdline_rdline.h b/lib/librte_cmdline/cmdline_rdline.h new file mode 100644 index 00000000..72e2dad8 --- /dev/null +++ b/lib/librte_cmdline/cmdline_rdline.h @@ -0,0 +1,255 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RDLINE_H_ +#define _RDLINE_H_ + +/** + * This file is a small equivalent to the GNU readline library, but it + * was originally designed for small systems, like Atmel AVR + * microcontrollers (8 bits). Indeed, we don't use any malloc that is + * sometimes not implemented (or just not recommended) on such + * systems. + * + * Obviously, it does not support as many things as the GNU readline, + * but at least it supports some interesting features like a kill + * buffer and a command history. + * + * It also have a feature that does not have the GNU readline (as far + * as I know): we can have several instances of it running at the same + * time, even on a monothread program, since it works with callbacks. + * + * The lib is designed for a client-side or a server-side use: + * - server-side: the server receives all data from a socket, including + * control chars, like arrows, tabulations, ... The client is + * very simple, it can be a telnet or a minicom through a serial line. + * - client-side: the client receives its data through its stdin for + * instance. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* configuration */ +#define RDLINE_BUF_SIZE 512 +#define RDLINE_PROMPT_SIZE 32 +#define RDLINE_VT100_BUF_SIZE 8 +#define RDLINE_HISTORY_BUF_SIZE BUFSIZ +#define RDLINE_HISTORY_MAX_LINE 64 + +enum rdline_status { + RDLINE_INIT, + RDLINE_RUNNING, + RDLINE_EXITED +}; + +struct rdline; + +typedef int (rdline_write_char_t)(struct rdline *rdl, char); +typedef void (rdline_validate_t)(struct rdline *rdl, + const char *buf, unsigned int size); +typedef int (rdline_complete_t)(struct rdline *rdl, const char *buf, + char *dstbuf, unsigned int dstsize, + int *state); + +struct rdline { + enum rdline_status status; + /* rdline bufs */ + struct cirbuf left; + struct cirbuf right; + char left_buf[RDLINE_BUF_SIZE+2]; /* reserve 2 chars for the \n\0 */ + char right_buf[RDLINE_BUF_SIZE]; + + char prompt[RDLINE_PROMPT_SIZE]; + unsigned int prompt_size; + + char kill_buf[RDLINE_BUF_SIZE]; + unsigned int kill_size; + + /* history */ + struct cirbuf history; + char history_buf[RDLINE_HISTORY_BUF_SIZE]; + int history_cur_line; + + /* callbacks and func pointers */ + rdline_write_char_t *write_char; + rdline_validate_t *validate; + rdline_complete_t *complete; + + /* vt100 parser */ + struct cmdline_vt100 vt100; + + /* opaque pointer */ + void *opaque; +}; + +/** + * Init fields for a struct rdline. Call this only once at the beginning + * of your program. + * \param rdl A pointer to an uninitialized struct rdline + * \param write_char The function used by the function to write a character + * \param validate A pointer to the function to execute when the + * user validates the buffer. + * \param complete A pointer to the function to execute when the + * user completes the buffer. + */ +int rdline_init(struct rdline *rdl, + rdline_write_char_t *write_char, + rdline_validate_t *validate, + rdline_complete_t *complete); + + +/** + * Init the current buffer, and display a prompt. + * \param rdl A pointer to a struct rdline + * \param prompt A string containing the prompt + */ +void rdline_newline(struct rdline *rdl, const char *prompt); + +/** + * Call it and all received chars will be ignored. + * \param rdl A pointer to a struct rdline + */ +void rdline_stop(struct rdline *rdl); + +/** + * Same than rdline_stop() except that next calls to rdline_char_in() + * will return RDLINE_RES_EXITED. + * \param rdl A pointer to a struct rdline + */ +void rdline_quit(struct rdline *rdl); + +/** + * Restart after a call to rdline_stop() or rdline_quit() + * \param rdl A pointer to a struct rdline + */ +void rdline_restart(struct rdline *rdl); + +/** + * Redisplay the current buffer + * \param rdl A pointer to a struct rdline + */ +void rdline_redisplay(struct rdline *rdl); + +/** + * Reset the current buffer and setup for a new line. + * \param rdl A pointer to a struct rdline + */ +void rdline_reset(struct rdline *rdl); + + +/* return status for rdline_char_in() */ +#define RDLINE_RES_SUCCESS 0 +#define RDLINE_RES_VALIDATED 1 +#define RDLINE_RES_COMPLETE 2 +#define RDLINE_RES_NOT_RUNNING -1 +#define RDLINE_RES_EOF -2 +#define RDLINE_RES_EXITED -3 + +/** + * append a char to the readline buffer. + * Return RDLINE_RES_VALIDATE when the line has been validated. + * Return RDLINE_RES_COMPLETE when the user asked to complete the buffer. + * Return RDLINE_RES_NOT_RUNNING if it is not running. + * Return RDLINE_RES_EOF if EOF (ctrl-d on an empty line). + * Else return RDLINE_RES_SUCCESS. + * XXX error case when the buffer is full ? + * + * \param rdl A pointer to a struct rdline + * \param c The character to append + */ +int rdline_char_in(struct rdline *rdl, char c); + +/** + * Return the current buffer, terminated by '\0'. + * \param rdl A pointer to a struct rdline + */ +const char *rdline_get_buffer(struct rdline *rdl); + + +/** + * Add the buffer to history. + * return < 0 on error. + * \param rdl A pointer to a struct rdline + * \param buf A buffer that is terminated by '\0' + */ +int rdline_add_history(struct rdline *rdl, const char *buf); + +/** + * Clear current history + * \param rdl A pointer to a struct rdline + */ +void rdline_clear_history(struct rdline *rdl); + +/** + * Get the i-th history item + */ +char *rdline_get_history_item(struct rdline *rdl, unsigned int i); + +#ifdef __cplusplus +} +#endif + +#endif /* _RDLINE_H_ */ diff --git a/lib/librte_cmdline/cmdline_socket.c b/lib/librte_cmdline/cmdline_socket.c new file mode 100644 index 00000000..3fc243b7 --- /dev/null +++ b/lib/librte_cmdline/cmdline_socket.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cmdline_parse.h" +#include "cmdline_rdline.h" +#include "cmdline_socket.h" +#include "cmdline.h" + +struct cmdline * +cmdline_file_new(cmdline_parse_ctx_t *ctx, const char *prompt, const char *path) +{ + int fd; + + /* everything else is checked in cmdline_new() */ + if (!path) + return NULL; + + fd = open(path, O_RDONLY, 0); + if (fd < 0) { + dprintf("open() failed\n"); + return NULL; + } + return cmdline_new(ctx, prompt, fd, -1); +} + +struct cmdline * +cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char *prompt) +{ + struct cmdline *cl; + struct termios oldterm, term; + + tcgetattr(0, &oldterm); + memcpy(&term, &oldterm, sizeof(term)); + term.c_lflag &= ~(ICANON | ECHO | ISIG); + tcsetattr(0, TCSANOW, &term); + setbuf(stdin, NULL); + + cl = cmdline_new(ctx, prompt, 0, 1); + + if (cl) + memcpy(&cl->oldterm, &oldterm, sizeof(term)); + + return cl; +} + +void +cmdline_stdin_exit(struct cmdline *cl) +{ + if (!cl) + return; + + tcsetattr(fileno(stdin), TCSANOW, &cl->oldterm); +} diff --git a/lib/librte_cmdline/cmdline_socket.h b/lib/librte_cmdline/cmdline_socket.h new file mode 100644 index 00000000..8cc2dfbc --- /dev/null +++ b/lib/librte_cmdline/cmdline_socket.h @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CMDLINE_SOCKET_H_ +#define _CMDLINE_SOCKET_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +struct cmdline *cmdline_file_new(cmdline_parse_ctx_t *ctx, const char *prompt, const char *path); +struct cmdline *cmdline_stdin_new(cmdline_parse_ctx_t *ctx, const char *prompt); +void cmdline_stdin_exit(struct cmdline *cl); + +#ifdef __cplusplus +} +#endif + +#endif /* _CMDLINE_SOCKET_H_ */ diff --git a/lib/librte_cmdline/cmdline_vt100.c b/lib/librte_cmdline/cmdline_vt100.c new file mode 100644 index 00000000..a253e8b6 --- /dev/null +++ b/lib/librte_cmdline/cmdline_vt100.c @@ -0,0 +1,185 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "cmdline_vt100.h" + +const char *cmdline_vt100_commands[] = { + vt100_up_arr, + vt100_down_arr, + vt100_right_arr, + vt100_left_arr, + "\177", + "\n", + "\001", + "\005", + "\013", + "\031", + "\003", + "\006", + "\002", + vt100_suppr, + vt100_tab, + "\004", + "\014", + "\r", + "\033\177", + vt100_word_left, + vt100_word_right, + "?", + "\027", + "\020", + "\016", + "\033\144", +}; + +void +vt100_init(struct cmdline_vt100 *vt) +{ + if (!vt) + return; + vt->state = CMDLINE_VT100_INIT; +} + + +static int +match_command(char *buf, unsigned int size) +{ + const char *cmd; + size_t cmdlen; + unsigned int i = 0; + + for (i=0 ; ibufpos >= CMDLINE_VT100_BUF_SIZE) { + vt->state = CMDLINE_VT100_INIT; + vt->bufpos = 0; + } + + vt->buf[vt->bufpos++] = c; + size = vt->bufpos; + + switch (vt->state) { + case CMDLINE_VT100_INIT: + if (c == 033) { + vt->state = CMDLINE_VT100_ESCAPE; + } + else { + vt->bufpos = 0; + goto match_command; + } + break; + + case CMDLINE_VT100_ESCAPE: + if (c == 0133) { + vt->state = CMDLINE_VT100_ESCAPE_CSI; + } + else if (c >= 060 && c <= 0177) { /* XXX 0177 ? */ + vt->bufpos = 0; + vt->state = CMDLINE_VT100_INIT; + goto match_command; + } + break; + + case CMDLINE_VT100_ESCAPE_CSI: + if (c >= 0100 && c <= 0176) { + vt->bufpos = 0; + vt->state = CMDLINE_VT100_INIT; + goto match_command; + } + break; + + default: + vt->bufpos = 0; + break; + } + + return -2; + + match_command: + return match_command(vt->buf, size); +} diff --git a/lib/librte_cmdline/cmdline_vt100.h b/lib/librte_cmdline/cmdline_vt100.h new file mode 100644 index 00000000..963add8d --- /dev/null +++ b/lib/librte_cmdline/cmdline_vt100.h @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 2009, Olivier MATZ + * All rights reserved. + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * * Neither the name of the University of California, Berkeley nor the + * names of its contributors may be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _CMDLINE_VT100_H_ +#define _CMDLINE_VT100_H_ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define vt100_bell "\007" +#define vt100_bs "\010" +#define vt100_bs_clear "\010 \010" +#define vt100_tab "\011" +#define vt100_crnl "\012\015" +#define vt100_clear_right "\033[0K" +#define vt100_clear_left "\033[1K" +#define vt100_clear_down "\033[0J" +#define vt100_clear_up "\033[1J" +#define vt100_clear_line "\033[2K" +#define vt100_clear_screen "\033[2J" +#define vt100_up_arr "\033\133\101" +#define vt100_down_arr "\033\133\102" +#define vt100_right_arr "\033\133\103" +#define vt100_left_arr "\033\133\104" +#define vt100_multi_right "\033\133%uC" +#define vt100_multi_left "\033\133%uD" +#define vt100_suppr "\033\133\063\176" +#define vt100_home "\033M\033E" +#define vt100_word_left "\033\142" +#define vt100_word_right "\033\146" + +/* Result of parsing : it must be synchronized with + * cmdline_vt100_commands[] in vt100.c */ +#define CMDLINE_KEY_UP_ARR 0 +#define CMDLINE_KEY_DOWN_ARR 1 +#define CMDLINE_KEY_RIGHT_ARR 2 +#define CMDLINE_KEY_LEFT_ARR 3 +#define CMDLINE_KEY_BKSPACE 4 +#define CMDLINE_KEY_RETURN 5 +#define CMDLINE_KEY_CTRL_A 6 +#define CMDLINE_KEY_CTRL_E 7 +#define CMDLINE_KEY_CTRL_K 8 +#define CMDLINE_KEY_CTRL_Y 9 +#define CMDLINE_KEY_CTRL_C 10 +#define CMDLINE_KEY_CTRL_F 11 +#define CMDLINE_KEY_CTRL_B 12 +#define CMDLINE_KEY_SUPPR 13 +#define CMDLINE_KEY_TAB 14 +#define CMDLINE_KEY_CTRL_D 15 +#define CMDLINE_KEY_CTRL_L 16 +#define CMDLINE_KEY_RETURN2 17 +#define CMDLINE_KEY_META_BKSPACE 18 +#define CMDLINE_KEY_WLEFT 19 +#define CMDLINE_KEY_WRIGHT 20 +#define CMDLINE_KEY_HELP 21 +#define CMDLINE_KEY_CTRL_W 22 +#define CMDLINE_KEY_CTRL_P 23 +#define CMDLINE_KEY_CTRL_N 24 +#define CMDLINE_KEY_META_D 25 + +extern const char *cmdline_vt100_commands[]; + +enum cmdline_vt100_parser_state { + CMDLINE_VT100_INIT, + CMDLINE_VT100_ESCAPE, + CMDLINE_VT100_ESCAPE_CSI +}; + +#define CMDLINE_VT100_BUF_SIZE 8 +struct cmdline_vt100 { + uint8_t bufpos; + char buf[CMDLINE_VT100_BUF_SIZE]; + enum cmdline_vt100_parser_state state; +}; + +/** + * Init + */ +void vt100_init(struct cmdline_vt100 *vt); + +/** + * Input a new character. + * Return -1 if the character is not part of a control sequence + * Return -2 if c is not the last char of a control sequence + * Else return the index in vt100_commands[] + */ +int vt100_parser(struct cmdline_vt100 *vt, char c); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_cmdline/rte_cmdline_version.map b/lib/librte_cmdline/rte_cmdline_version.map new file mode 100644 index 00000000..c9fc18ab --- /dev/null +++ b/lib/librte_cmdline/rte_cmdline_version.map @@ -0,0 +1,78 @@ +DPDK_2.0 { + global: + + cirbuf_add_buf_head; + cirbuf_add_buf_tail; + cirbuf_add_head; + cirbuf_add_head_safe; + cirbuf_add_tail; + cirbuf_add_tail_safe; + cirbuf_align_left; + cirbuf_align_right; + cirbuf_del_buf_head; + cirbuf_del_buf_tail; + cirbuf_del_head; + cirbuf_del_head_safe; + cirbuf_del_tail; + cirbuf_del_tail_safe; + cirbuf_get_buf_head; + cirbuf_get_buf_tail; + cirbuf_get_head; + cirbuf_get_tail; + cirbuf_init; + cmdline_complete; + cmdline_complete_get_elt_string; + cmdline_complete_get_nb_string; + cmdline_file_new; + cmdline_free; + cmdline_get_help_etheraddr; + cmdline_get_help_ipaddr; + cmdline_get_help_num; + cmdline_get_help_portlist; + cmdline_get_help_string; + cmdline_in; + cmdline_interact; + cmdline_isendoftoken; + cmdline_new; + cmdline_parse; + cmdline_parse_etheraddr; + cmdline_parse_ipaddr; + cmdline_parse_num; + cmdline_parse_portlist; + cmdline_parse_string; + cmdline_printf; + cmdline_quit; + cmdline_set_prompt; + cmdline_stdin_exit; + cmdline_stdin_new; + cmdline_token_etheraddr_ops; + cmdline_token_ipaddr_ops; + cmdline_token_num_ops; + cmdline_token_portlist_ops; + cmdline_token_string_ops; + cmdline_token_string_ops; + cmdline_write_char; + rdline_add_history; + rdline_char_in; + rdline_clear_history; + rdline_get_buffer; + rdline_get_history_item; + rdline_init; + rdline_newline; + rdline_quit; + rdline_redisplay; + rdline_reset; + rdline_restart; + rdline_stop; + vt100_init; + vt100_parser; + + local: *; +}; + +DPDK_2.1 { + global: + + cmdline_poll; + +} DPDK_2.0; diff --git a/lib/librte_compat/Makefile b/lib/librte_compat/Makefile new file mode 100644 index 00000000..0c57533c --- /dev/null +++ b/lib/librte_compat/Makefile @@ -0,0 +1,40 @@ +# BSD LICENSE +# +# Copyright(c) 2013 Neil Horman +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + + +LIBABIVER := 1 + +# install includes +SYMLINK-y-include := rte_compat.h + +include $(RTE_SDK)/mk/rte.install.mk diff --git a/lib/librte_compat/rte_compat.h b/lib/librte_compat/rte_compat.h new file mode 100644 index 00000000..1c3c8d52 --- /dev/null +++ b/lib/librte_compat/rte_compat.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Neil Horman . + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMPAT_H_ +#define _RTE_COMPAT_H_ +#include + +#ifdef RTE_BUILD_SHARED_LIB + +/* + * Provides backwards compatibility when updating exported functions. + * When a symol is exported from a library to provide an API, it also provides a + * calling convention (ABI) that is embodied in its name, return type, + * arguments, etc. On occasion that function may need to change to accommodate + * new functionality, behavior, etc. When that occurs, it is desireable to + * allow for backwards compatibility for a time with older binaries that are + * dynamically linked to the dpdk. To support that, the __vsym and + * VERSION_SYMBOL macros are created. They, in conjunction with the + * _version.map file for a given library allow for multiple versions of + * a symbol to exist in a shared library so that older binaries need not be + * immediately recompiled. + * + * Refer to the guidelines document in the docs subdirectory for details on the + * use of these macros + */ + +/* + * Macro Parameters: + * b - function base name + * e - function version extension, to be concatenated with base name + * n - function symbol version string to be applied + * f - function prototype + * p - full function symbol name + */ + +/* + * VERSION_SYMBOL + * Creates a symbol version table entry binding symbol @DPDK_ to the internal + * function name _ + */ +#define VERSION_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@DPDK_" RTE_STR(n)) + +/* + * BIND_DEFAULT_SYMBOL + * Creates a symbol version entry instructing the linker to bind references to + * symbol to the internal symbol _ + */ +#define BIND_DEFAULT_SYMBOL(b, e, n) __asm__(".symver " RTE_STR(b) RTE_STR(e) ", " RTE_STR(b) "@@DPDK_" RTE_STR(n)) +#define __vsym __attribute__((used)) + +/* + * MAP_STATIC_SYMBOL + * If a function has been bifurcated into multiple versions, none of which + * are defined as the exported symbol name in the map file, this macro can be + * used to alias a specific version of the symbol to its exported name. For + * example, if you have 2 versions of a function foo_v1 and foo_v2, where the + * former is mapped to foo@DPDK_1 and the latter is mapped to foo@DPDK_2 when + * building a shared library, this macro can be used to map either foo_v1 or + * foo_v2 to the symbol foo when building a static library, e.g.: + * MAP_STATIC_SYMBOL(void foo(), foo_v2); + */ +#define MAP_STATIC_SYMBOL(f, p) + +#else +/* + * No symbol versioning in use + */ +#define VERSION_SYMBOL(b, e, n) +#define __vsym +#define BIND_DEFAULT_SYMBOL(b, e, n) +#define MAP_STATIC_SYMBOL(f, p) f __attribute__((alias(RTE_STR(p)))) +/* + * RTE_BUILD_SHARED_LIB=n + */ +#endif + + +#endif /* _RTE_COMPAT_H_ */ diff --git a/lib/librte_cryptodev/Makefile b/lib/librte_cryptodev/Makefile new file mode 100644 index 00000000..314a0466 --- /dev/null +++ b/lib/librte_cryptodev/Makefile @@ -0,0 +1,62 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_cryptodev.a + +# library version +LIBABIVER := 1 + +# build flags +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +# library source files +SRCS-y += rte_cryptodev.c + +# export include files +SYMLINK-y-include += rte_crypto.h +SYMLINK-y-include += rte_crypto_sym.h +SYMLINK-y-include += rte_cryptodev.h +SYMLINK-y-include += rte_cryptodev_pmd.h + +# versioning export map +EXPORT_MAP := rte_cryptodev_version.map + +# library dependencies +DEPDIRS-y += lib/librte_eal +DEPDIRS-y += lib/librte_mempool +DEPDIRS-y += lib/librte_ring +DEPDIRS-y += lib/librte_mbuf +DEPDIRS-y += lib/librte_kvargs + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_cryptodev/rte_crypto.h b/lib/librte_cryptodev/rte_crypto.h new file mode 100644 index 00000000..5bc3eaa7 --- /dev/null +++ b/lib/librte_cryptodev/rte_crypto.h @@ -0,0 +1,415 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRYPTO_H_ +#define _RTE_CRYPTO_H_ + +/** + * @file rte_crypto.h + * + * RTE Cryptography Common Definitions + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +#include +#include +#include + +#include "rte_crypto_sym.h" + +/** Crypto operation types */ +enum rte_crypto_op_type { + RTE_CRYPTO_OP_TYPE_UNDEFINED, + /**< Undefined operation type */ + RTE_CRYPTO_OP_TYPE_SYMMETRIC, + /**< Symmetric operation */ +}; + +/** Status of crypto operation */ +enum rte_crypto_op_status { + RTE_CRYPTO_OP_STATUS_SUCCESS, + /**< Operation completed successfully */ + RTE_CRYPTO_OP_STATUS_NOT_PROCESSED, + /**< Operation has not yet been processed by a crypto device */ + RTE_CRYPTO_OP_STATUS_ENQUEUED, + /**< Operation is enqueued on device */ + RTE_CRYPTO_OP_STATUS_AUTH_FAILED, + /**< Authentication verification failed */ + RTE_CRYPTO_OP_STATUS_INVALID_SESSION, + /**< + * Symmetric operation failed due to invalid session arguments, or if + * in session-less mode, failed to allocate private operation material. + */ + RTE_CRYPTO_OP_STATUS_INVALID_ARGS, + /**< Operation failed due to invalid arguments in request */ + RTE_CRYPTO_OP_STATUS_ERROR, + /**< Error handling operation */ +}; + +/** + * Cryptographic Operation. + * + * This structure contains data relating to performing cryptographic + * operations. This operation structure is used to contain any operation which + * is supported by the cryptodev API, PMDs should check the type parameter to + * verify that the operation is a support function of the device. Crypto + * operations are enqueued and dequeued in crypto PMDs using the + * rte_cryptodev_enqueue_burst() / rte_cryptodev_dequeue_burst() . + */ +struct rte_crypto_op { + enum rte_crypto_op_type type; + /**< operation type */ + + enum rte_crypto_op_status status; + /**< + * operation status - this is reset to + * RTE_CRYPTO_OP_STATUS_NOT_PROCESSED on allocation from mempool and + * will be set to RTE_CRYPTO_OP_STATUS_SUCCESS after crypto operation + * is successfully processed by a crypto PMD + */ + + struct rte_mempool *mempool; + /**< crypto operation mempool which operation is allocated from */ + + phys_addr_t phys_addr; + /**< physical address of crypto operation */ + + void *opaque_data; + /**< Opaque pointer for user data */ + + union { + struct rte_crypto_sym_op *sym; + /**< Symmetric operation parameters */ + }; /**< operation specific parameters */ +} __rte_cache_aligned; + +/** + * Reset the fields of a crypto operation to their default values. + * + * @param op The crypto operation to be reset. + * @param type The crypto operation type. + */ +static inline void +__rte_crypto_op_reset(struct rte_crypto_op *op, enum rte_crypto_op_type type) +{ + op->type = type; + op->status = RTE_CRYPTO_OP_STATUS_NOT_PROCESSED; + + switch (type) { + case RTE_CRYPTO_OP_TYPE_SYMMETRIC: + /** Symmetric operation structure starts after the end of the + * rte_crypto_op structure. + */ + op->sym = (struct rte_crypto_sym_op *)(op + 1); + op->type = type; + + __rte_crypto_sym_op_reset(op->sym); + break; + default: + break; + } + + op->opaque_data = NULL; +} + +/** + * Private data structure belonging to a crypto symmetric operation pool. + */ +struct rte_crypto_op_pool_private { + enum rte_crypto_op_type type; + /**< Crypto op pool type operation. */ + uint16_t priv_size; + /**< Size of private area in each crypto operation. */ +}; + + +/** + * Returns the size of private data allocated with each rte_crypto_op object by + * the mempool + * + * @param mempool rte_crypto_op mempool + * + * @return private data size + */ +static inline uint16_t +__rte_crypto_op_get_priv_data_size(struct rte_mempool *mempool) +{ + struct rte_crypto_op_pool_private *priv = + (struct rte_crypto_op_pool_private *) rte_mempool_get_priv(mempool); + + return priv->priv_size; +} + + +/** + * Creates a crypto operation pool + * + * @param name pool name + * @param type crypto operation type, use + * RTE_CRYPTO_OP_TYPE_UNDEFINED for a pool which + * supports all operation types + * @param nb_elts number of elements in pool + * @param cache_size Number of elements to cache on lcore, see + * *rte_mempool_create* for further details about + * cache size + * @param priv_size Size of private data to allocate with each + * operation + * @param socket_id Socket to allocate memory on + * + * @return + * - On success pointer to mempool + * - On failure NULL + */ +extern struct rte_mempool * +rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type, + unsigned nb_elts, unsigned cache_size, uint16_t priv_size, + int socket_id); + +/** + * Bulk allocate raw element from mempool and return as crypto operations + * + * @param mempool crypto operation mempool. + * @param type crypto operation type. + * @param ops Array to place allocated crypto operations + * @param nb_ops Number of crypto operations to allocate + * + * @returns + * - On success returns number of ops allocated + */ +static inline int +__rte_crypto_op_raw_bulk_alloc(struct rte_mempool *mempool, + enum rte_crypto_op_type type, + struct rte_crypto_op **ops, uint16_t nb_ops) +{ + struct rte_crypto_op_pool_private *priv; + + priv = (struct rte_crypto_op_pool_private *) rte_mempool_get_priv(mempool); + if (unlikely(priv->type != type && + priv->type != RTE_CRYPTO_OP_TYPE_UNDEFINED)) + return -EINVAL; + + if (rte_mempool_get_bulk(mempool, (void **)ops, nb_ops) == 0) + return nb_ops; + + return 0; +} + +/** + * Allocate a crypto operation from a mempool with default parameters set + * + * @param mempool crypto operation mempool + * @param type operation type to allocate + * + * @returns + * - On success returns a valid rte_crypto_op structure + * - On failure returns NULL + */ +static inline struct rte_crypto_op * +rte_crypto_op_alloc(struct rte_mempool *mempool, enum rte_crypto_op_type type) +{ + struct rte_crypto_op *op = NULL; + int retval; + + retval = __rte_crypto_op_raw_bulk_alloc(mempool, type, &op, 1); + if (unlikely(retval != 1)) + return NULL; + + __rte_crypto_op_reset(op, type); + + return op; +} + + +/** + * Bulk allocate crypto operations from a mempool with default parameters set + * + * @param mempool crypto operation mempool + * @param type operation type to allocate + * @param ops Array to place allocated crypto operations + * @param nb_ops Number of crypto operations to allocate + * + * @returns + * - On success returns a valid rte_crypto_op structure + * - On failure returns NULL + */ + +static inline unsigned +rte_crypto_op_bulk_alloc(struct rte_mempool *mempool, + enum rte_crypto_op_type type, + struct rte_crypto_op **ops, uint16_t nb_ops) +{ + int i; + + if (unlikely(__rte_crypto_op_raw_bulk_alloc(mempool, type, ops, nb_ops) + != nb_ops)) + return 0; + + for (i = 0; i < nb_ops; i++) + __rte_crypto_op_reset(ops[i], type); + + return nb_ops; +} + + + +/** + * Returns a pointer to the private data of a crypto operation if + * that operation has enough capacity for requested size. + * + * @param op crypto operation. + * @param size size of space requested in private data. + * + * @returns + * - if sufficient space available returns pointer to start of private data + * - if insufficient space returns NULL + */ +static inline void * +__rte_crypto_op_get_priv_data(struct rte_crypto_op *op, uint32_t size) +{ + uint32_t priv_size; + + if (likely(op->mempool != NULL)) { + priv_size = __rte_crypto_op_get_priv_data_size(op->mempool); + + if (likely(priv_size >= size)) + return (void *)((uint8_t *)(op + 1) + + sizeof(struct rte_crypto_sym_op)); + } + + return NULL; +} + +/** + * free crypto operation structure + * If operation has been allocate from a rte_mempool, then the operation will + * be returned to the mempool. + * + * @param op symmetric crypto operation + */ +static inline void +rte_crypto_op_free(struct rte_crypto_op *op) +{ + if (op != NULL && op->mempool != NULL) + rte_mempool_put(op->mempool, op); +} + +/** + * Allocate a symmetric crypto operation in the private data of an mbuf. + * + * @param m mbuf which is associated with the crypto operation, the + * operation will be allocated in the private data of that + * mbuf. + * + * @returns + * - On success returns a pointer to the crypto operation. + * - On failure returns NULL. + */ +static inline struct rte_crypto_op * +rte_crypto_sym_op_alloc_from_mbuf_priv_data(struct rte_mbuf *m) +{ + if (unlikely(m == NULL)) + return NULL; + + /* + * check that the mbuf's private data size is sufficient to contain a + * crypto operation + */ + if (unlikely(m->priv_size < (sizeof(struct rte_crypto_op) + + sizeof(struct rte_crypto_sym_op)))) + return NULL; + + /* private data starts immediately after the mbuf header in the mbuf. */ + struct rte_crypto_op *op = (struct rte_crypto_op *)(m + 1); + + __rte_crypto_op_reset(op, RTE_CRYPTO_OP_TYPE_SYMMETRIC); + + op->mempool = NULL; + op->sym->m_src = m; + + return op; +} + +/** + * Allocate space for symmetric crypto xforms in the private data space of the + * crypto operation. This also defaults the crypto xform type and configures + * the chaining of the xforms in the crypto operation + * + * @return + * - On success returns pointer to first crypto xform in crypto operations chain + * - On failure returns NULL + */ +static inline struct rte_crypto_sym_xform * +rte_crypto_op_sym_xforms_alloc(struct rte_crypto_op *op, uint8_t nb_xforms) +{ + void *priv_data; + uint32_t size; + + if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) + return NULL; + + size = sizeof(struct rte_crypto_sym_xform) * nb_xforms; + + priv_data = __rte_crypto_op_get_priv_data(op, size); + if (priv_data == NULL) + return NULL; + + return __rte_crypto_sym_op_sym_xforms_alloc(op->sym, priv_data, + nb_xforms); +} + + +/** + * Attach a session to a crypto operation + * + * @param op crypto operation, must be of type symmetric + * @param sess cryptodev session + */ +static inline int +rte_crypto_op_attach_sym_session(struct rte_crypto_op *op, + struct rte_cryptodev_sym_session *sess) +{ + if (unlikely(op->type != RTE_CRYPTO_OP_TYPE_SYMMETRIC)) + return -1; + + return __rte_crypto_sym_op_attach_sym_session(op->sym, sess); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CRYPTO_H_ */ diff --git a/lib/librte_cryptodev/rte_crypto_sym.h b/lib/librte_cryptodev/rte_crypto_sym.h new file mode 100644 index 00000000..4ae9b9e8 --- /dev/null +++ b/lib/librte_cryptodev/rte_crypto_sym.h @@ -0,0 +1,662 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRYPTO_SYM_H_ +#define _RTE_CRYPTO_SYM_H_ + +/** + * @file rte_crypto_sym.h + * + * RTE Definitions for Symmetric Cryptography + * + * Defines symmetric cipher and authentication algorithms and modes, as well + * as supported symmetric crypto operation combinations. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include +#include +#include + + +/** Symmetric Cipher Algorithms */ +enum rte_crypto_cipher_algorithm { + RTE_CRYPTO_CIPHER_NULL = 1, + /**< NULL cipher algorithm. No mode applies to the NULL algorithm. */ + + RTE_CRYPTO_CIPHER_3DES_CBC, + /**< Triple DES algorithm in CBC mode */ + RTE_CRYPTO_CIPHER_3DES_CTR, + /**< Triple DES algorithm in CTR mode */ + RTE_CRYPTO_CIPHER_3DES_ECB, + /**< Triple DES algorithm in ECB mode */ + + RTE_CRYPTO_CIPHER_AES_CBC, + /**< AES algorithm in CBC mode */ + RTE_CRYPTO_CIPHER_AES_CCM, + /**< AES algorithm in CCM mode. When this cipher algorithm is used the + * *RTE_CRYPTO_AUTH_AES_CCM* element of the + * *rte_crypto_hash_algorithm* enum MUST be used to set up the related + * *rte_crypto_auth_xform* structure in the session context or in + * the op_params of the crypto operation structure in the case of a + * session-less crypto operation + */ + RTE_CRYPTO_CIPHER_AES_CTR, + /**< AES algorithm in Counter mode */ + RTE_CRYPTO_CIPHER_AES_ECB, + /**< AES algorithm in ECB mode */ + RTE_CRYPTO_CIPHER_AES_F8, + /**< AES algorithm in F8 mode */ + RTE_CRYPTO_CIPHER_AES_GCM, + /**< AES algorithm in GCM mode. When this cipher algorithm is used the + * *RTE_CRYPTO_AUTH_AES_GCM* element of the + * *rte_crypto_auth_algorithm* enum MUST be used to set up the related + * *rte_crypto_auth_setup_data* structure in the session context or in + * the op_params of the crypto operation structure in the case of a + * session-less crypto operation. + */ + RTE_CRYPTO_CIPHER_AES_XTS, + /**< AES algorithm in XTS mode */ + + RTE_CRYPTO_CIPHER_ARC4, + /**< (A)RC4 cipher algorithm */ + + RTE_CRYPTO_CIPHER_KASUMI_F8, + /**< Kasumi algorithm in F8 mode */ + + RTE_CRYPTO_CIPHER_SNOW3G_UEA2, + /**< SNOW3G algorithm in UEA2 mode */ + + RTE_CRYPTO_CIPHER_ZUC_EEA3, + /**< ZUC algorithm in EEA3 mode */ + + RTE_CRYPTO_CIPHER_LIST_END +}; + +/** Symmetric Cipher Direction */ +enum rte_crypto_cipher_operation { + RTE_CRYPTO_CIPHER_OP_ENCRYPT, + /**< Encrypt cipher operation */ + RTE_CRYPTO_CIPHER_OP_DECRYPT + /**< Decrypt cipher operation */ +}; + +/** + * Symmetric Cipher Setup Data. + * + * This structure contains data relating to Cipher (Encryption and Decryption) + * use to create a session. + */ +struct rte_crypto_cipher_xform { + enum rte_crypto_cipher_operation op; + /**< This parameter determines if the cipher operation is an encrypt or + * a decrypt operation. For the RC4 algorithm and the F8/CTR modes, + * only encrypt operations are valid. + */ + enum rte_crypto_cipher_algorithm algo; + /**< Cipher algorithm */ + + struct { + uint8_t *data; /**< pointer to key data */ + size_t length; /**< key length in bytes */ + } key; + /**< Cipher key + * + * For the RTE_CRYPTO_CIPHER_AES_F8 mode of operation, key.data will + * point to a concatenation of the AES encryption key followed by a + * keymask. As per RFC3711, the keymask should be padded with trailing + * bytes to match the length of the encryption key used. + * + * For AES-XTS mode of operation, two keys must be provided and + * key.data must point to the two keys concatenated together (Key1 || + * Key2). The cipher key length will contain the total size of both + * keys. + * + * Cipher key length is in bytes. For AES it can be 128 bits (16 bytes), + * 192 bits (24 bytes) or 256 bits (32 bytes). + * + * For the CCM mode of operation, the only supported key length is 128 + * bits (16 bytes). + * + * For the RTE_CRYPTO_CIPHER_AES_F8 mode of operation, key.length + * should be set to the combined length of the encryption key and the + * keymask. Since the keymask and the encryption key are the same size, + * key.length should be set to 2 x the AES encryption key length. + * + * For the AES-XTS mode of operation: + * - Two keys must be provided and key.length refers to total length of + * the two keys. + * - Each key can be either 128 bits (16 bytes) or 256 bits (32 bytes). + * - Both keys must have the same size. + **/ +}; + +/** Symmetric Authentication / Hash Algorithms */ +enum rte_crypto_auth_algorithm { + RTE_CRYPTO_AUTH_NULL = 1, + /**< NULL hash algorithm. */ + + RTE_CRYPTO_AUTH_AES_CBC_MAC, + /**< AES-CBC-MAC algorithm. Only 128-bit keys are supported. */ + RTE_CRYPTO_AUTH_AES_CCM, + /**< AES algorithm in CCM mode. This is an authenticated cipher. When + * this hash algorithm is used, the *RTE_CRYPTO_CIPHER_AES_CCM* + * element of the *rte_crypto_cipher_algorithm* enum MUST be used to + * set up the related rte_crypto_cipher_setup_data structure in the + * session context or the corresponding parameter in the crypto + * operation data structures op_params parameter MUST be set for a + * session-less crypto operation. + */ + RTE_CRYPTO_AUTH_AES_CMAC, + /**< AES CMAC algorithm. */ + RTE_CRYPTO_AUTH_AES_GCM, + /**< AES algorithm in GCM mode. When this hash algorithm + * is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the + * rte_crypto_cipher_algorithm enum MUST be used to set up the related + * rte_crypto_cipher_setup_data structure in the session context, or + * the corresponding parameter in the crypto operation data structures + * op_params parameter MUST be set for a session-less crypto operation. + */ + RTE_CRYPTO_AUTH_AES_GMAC, + /**< AES GMAC algorithm. When this hash algorithm + * is used, the RTE_CRYPTO_CIPHER_AES_GCM element of the + * rte_crypto_cipher_algorithm enum MUST be used to set up the related + * rte_crypto_cipher_setup_data structure in the session context, or + * the corresponding parameter in the crypto operation data structures + * op_params parameter MUST be set for a session-less crypto operation. + */ + RTE_CRYPTO_AUTH_AES_XCBC_MAC, + /**< AES XCBC algorithm. */ + + RTE_CRYPTO_AUTH_KASUMI_F9, + /**< Kasumi algorithm in F9 mode. */ + + RTE_CRYPTO_AUTH_MD5, + /**< MD5 algorithm */ + RTE_CRYPTO_AUTH_MD5_HMAC, + /**< HMAC using MD5 algorithm */ + + RTE_CRYPTO_AUTH_SHA1, + /**< 128 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA1_HMAC, + /**< HMAC using 128 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA224, + /**< 224 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA224_HMAC, + /**< HMAC using 224 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA256, + /**< 256 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA256_HMAC, + /**< HMAC using 256 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA384, + /**< 384 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA384_HMAC, + /**< HMAC using 384 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA512, + /**< 512 bit SHA algorithm. */ + RTE_CRYPTO_AUTH_SHA512_HMAC, + /**< HMAC using 512 bit SHA algorithm. */ + + RTE_CRYPTO_AUTH_SNOW3G_UIA2, + /**< SNOW3G algorithm in UIA2 mode. */ + + RTE_CRYPTO_AUTH_ZUC_EIA3, + /**< ZUC algorithm in EIA3 mode */ + + RTE_CRYPTO_AUTH_LIST_END +}; + +/** Symmetric Authentication / Hash Operations */ +enum rte_crypto_auth_operation { + RTE_CRYPTO_AUTH_OP_VERIFY, /**< Verify authentication digest */ + RTE_CRYPTO_AUTH_OP_GENERATE /**< Generate authentication digest */ +}; + +/** + * Authentication / Hash transform data. + * + * This structure contains data relating to an authentication/hash crypto + * transforms. The fields op, algo and digest_length are common to all + * authentication transforms and MUST be set. + */ +struct rte_crypto_auth_xform { + enum rte_crypto_auth_operation op; + /**< Authentication operation type */ + enum rte_crypto_auth_algorithm algo; + /**< Authentication algorithm selection */ + + struct { + uint8_t *data; /**< pointer to key data */ + size_t length; /**< key length in bytes */ + } key; + /**< Authentication key data. + * The authentication key length MUST be less than or equal to the + * block size of the algorithm. It is the callers responsibility to + * ensure that the key length is compliant with the standard being used + * (for example RFC 2104, FIPS 198a). + */ + + uint32_t digest_length; + /**< Length of the digest to be returned. If the verify option is set, + * this specifies the length of the digest to be compared for the + * session. + * + * If the value is less than the maximum length allowed by the hash, + * the result shall be truncated. If the value is greater than the + * maximum length allowed by the hash then an error will be generated + * by *rte_cryptodev_sym_session_create* or by the + * *rte_cryptodev_sym_enqueue_burst* if using session-less APIs. + */ + + uint32_t add_auth_data_length; + /**< The length of the additional authenticated data (AAD) in bytes. + * The maximum permitted value is 240 bytes, unless otherwise specified + * below. + * + * This field must be specified when the hash algorithm is one of the + * following: + * + * - For SNOW3G (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2), this is the + * length of the IV (which should be 16). + * + * - For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM). In this case, this is + * the length of the Additional Authenticated Data (called A, in NIST + * SP800-38D). + * + * - For CCM (@ref RTE_CRYPTO_AUTH_AES_CCM). In this case, this is + * the length of the associated data (called A, in NIST SP800-38C). + * Note that this does NOT include the length of any padding, or the + * 18 bytes reserved at the start of the above field to store the + * block B0 and the encoded length. The maximum permitted value in + * this case is 222 bytes. + * + * @note + * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of operation + * this field is not used and should be set to 0. Instead the length + * of the AAD data is specified in the message length to hash field of + * the rte_crypto_sym_op_data structure. + */ +}; + +/** Crypto transformation types */ +enum rte_crypto_sym_xform_type { + RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED = 0, /**< No xform specified */ + RTE_CRYPTO_SYM_XFORM_AUTH, /**< Authentication xform */ + RTE_CRYPTO_SYM_XFORM_CIPHER /**< Cipher xform */ +}; + +/** + * Symmetric crypto transform structure. + * + * This is used to specify the crypto transforms required, multiple transforms + * can be chained together to specify a chain transforms such as authentication + * then cipher, or cipher then authentication. Each transform structure can + * hold a single transform, the type field is used to specify which transform + * is contained within the union + */ +struct rte_crypto_sym_xform { + struct rte_crypto_sym_xform *next; + /**< next xform in chain */ + enum rte_crypto_sym_xform_type type + ; /**< xform type */ + union { + struct rte_crypto_auth_xform auth; + /**< Authentication / hash xform */ + struct rte_crypto_cipher_xform cipher; + /**< Cipher xform */ + }; +}; + +/** + * Crypto operation session type. This is used to specify whether a crypto + * operation has session structure attached for immutable parameters or if all + * operation information is included in the operation data structure. + */ +enum rte_crypto_sym_op_sess_type { + RTE_CRYPTO_SYM_OP_WITH_SESSION, /**< Session based crypto operation */ + RTE_CRYPTO_SYM_OP_SESSIONLESS /**< Session-less crypto operation */ +}; + + +struct rte_cryptodev_sym_session; + +/** + * Symmetric Cryptographic Operation. + * + * This structure contains data relating to performing symmetric cryptographic + * processing on a referenced mbuf data buffer. + * + * When a symmetric crypto operation is enqueued with the device for processing + * it must have a valid *rte_mbuf* structure attached, via m_src parameter, + * which contains the source data which the crypto operation is to be performed + * on. + */ +struct rte_crypto_sym_op { + struct rte_mbuf *m_src; /**< source mbuf */ + struct rte_mbuf *m_dst; /**< destination mbuf */ + + enum rte_crypto_sym_op_sess_type sess_type; + + union { + struct rte_cryptodev_sym_session *session; + /**< Handle for the initialised session context */ + struct rte_crypto_sym_xform *xform; + /**< Session-less API crypto operation parameters */ + }; + + struct { + struct { + uint32_t offset; + /**< Starting point for cipher processing, specified + * as number of bytes from start of data in the source + * buffer. The result of the cipher operation will be + * written back into the output buffer starting at + * this location. + * + * @note + * For Snow3G @ RTE_CRYPTO_CIPHER_SNOW3G_UEA2, + * this field should be in bits. + */ + + uint32_t length; + /**< The message length, in bytes, of the source buffer + * on which the cryptographic operation will be + * computed. This must be a multiple of the block size + * if a block cipher is being used. This is also the + * same as the result length. + * + * @note + * In the case of CCM @ref RTE_CRYPTO_AUTH_AES_CCM, + * this value should not include the length of the + * padding or the length of the MAC; the driver will + * compute the actual number of bytes over which the + * encryption will occur, which will include these + * values. + * + * @note + * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC, this + * field should be set to 0. + * + * @note + * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UEA2 + * this field should be in bits. + */ + } data; /**< Data offsets and length for ciphering */ + + struct { + uint8_t *data; + /**< Initialisation Vector or Counter. + * + * - For block ciphers in CBC or F8 mode, or for Kasumi + * in F8 mode, or for SNOW3G in UEA2 mode, this is the + * Initialisation Vector (IV) value. + * + * - For block ciphers in CTR mode, this is the counter. + * + * - For GCM mode, this is either the IV (if the length + * is 96 bits) or J0 (for other sizes), where J0 is as + * defined by NIST SP800-38D. Regardless of the IV + * length, a full 16 bytes needs to be allocated. + * + * - For CCM mode, the first byte is reserved, and the + * nonce should be written starting at &iv[1] (to allow + * space for the implementation to write in the flags + * in the first byte). Note that a full 16 bytes should + * be allocated, even though the length field will + * have a value less than this. + * + * - For AES-XTS, this is the 128bit tweak, i, from + * IEEE Std 1619-2007. + * + * For optimum performance, the data pointed to SHOULD + * be 8-byte aligned. + */ + phys_addr_t phys_addr; + uint16_t length; + /**< Length of valid IV data. + * + * - For block ciphers in CBC or F8 mode, or for Kasumi + * in F8 mode, or for SNOW3G in UEA2 mode, this is the + * length of the IV (which must be the same as the + * block length of the cipher). + * + * - For block ciphers in CTR mode, this is the length + * of the counter (which must be the same as the block + * length of the cipher). + * + * - For GCM mode, this is either 12 (for 96-bit IVs) + * or 16, in which case data points to J0. + * + * - For CCM mode, this is the length of the nonce, + * which can be in the range 7 to 13 inclusive. + */ + } iv; /**< Initialisation vector parameters */ + } cipher; + + struct { + struct { + uint32_t offset; + /**< Starting point for hash processing, specified as + * number of bytes from start of packet in source + * buffer. + * + * @note + * For CCM and GCM modes of operation, this field is + * ignored. The field @ref aad field + * should be set instead. + * + * @note For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) + * mode of operation, this field specifies the start + * of the AAD data in the source buffer. + * + * @note + * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2 + * this field should be in bits. + */ + + uint32_t length; + /**< The message length, in bytes, of the source + * buffer that the hash will be computed on. + * + * @note + * For CCM and GCM modes of operation, this field is + * ignored. The field @ref aad field should be set + * instead. + * + * @note + * For AES-GMAC @ref RTE_CRYPTO_AUTH_AES_GMAC mode + * of operation, this field specifies the length of + * the AAD data in the source buffer. + * + * @note + * For Snow3G @ RTE_CRYPTO_AUTH_SNOW3G_UIA2 + * this field should be in bits. + */ + } data; /**< Data offsets and length for authentication */ + + struct { + uint8_t *data; + /**< If this member of this structure is set this is a + * pointer to the location where the digest result + * should be inserted (in the case of digest generation) + * or where the purported digest exists (in the case of + * digest verification). + * + * At session creation time, the client specified the + * digest result length with the digest_length member + * of the @ref rte_crypto_auth_xform structure. For + * physical crypto devices the caller must allocate at + * least digest_length of physically contiguous memory + * at this location. + * + * For digest generation, the digest result will + * overwrite any data at this location. + * + * @note + * For GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), for + * "digest result" read "authentication tag T". + * + * If this member is not set the digest result is + * understood to be in the destination buffer for + * digest generation, and in the source buffer for + * digest verification. The location of the digest + * result in this case is immediately following the + * region over which the digest is computed. + */ + phys_addr_t phys_addr; + /**< Physical address of digest */ + uint16_t length; + /**< Length of digest */ + } digest; /**< Digest parameters */ + + struct { + uint8_t *data; + /**< Pointer to Additional Authenticated Data (AAD) + * needed for authenticated cipher mechanisms (CCM and + * GCM), and to the IV for SNOW3G authentication + * (@ref RTE_CRYPTO_AUTH_SNOW3G_UIA2). For other + * authentication mechanisms this pointer is ignored. + * + * The length of the data pointed to by this field is + * set up for the session in the @ref + * rte_crypto_auth_xform structure as part of the @ref + * rte_cryptodev_sym_session_create function call. + * This length must not exceed 240 bytes. + * + * Specifically for CCM (@ref RTE_CRYPTO_AUTH_AES_CCM), + * the caller should setup this field as follows: + * + * - the nonce should be written starting at an offset + * of one byte into the array, leaving room for the + * implementation to write in the flags to the first + * byte. + * + * - the additional authentication data itself should + * be written starting at an offset of 18 bytes into + * the array, leaving room for the length encoding in + * the first two bytes of the second block. + * + * - the array should be big enough to hold the above + * fields, plus any padding to round this up to the + * nearest multiple of the block size (16 bytes). + * Padding will be added by the implementation. + * + * Finally, for GCM (@ref RTE_CRYPTO_AUTH_AES_GCM), the + * caller should setup this field as follows: + * + * - the AAD is written in starting at byte 0 + * - the array must be big enough to hold the AAD, plus + * any space to round this up to the nearest multiple + * of the block size (16 bytes). + * + * @note + * For AES-GMAC (@ref RTE_CRYPTO_AUTH_AES_GMAC) mode of + * operation, this field is not used and should be set + * to 0. Instead the AAD data should be placed in the + * source buffer. + */ + phys_addr_t phys_addr; /**< physical address */ + uint16_t length; /**< Length of digest */ + } aad; + /**< Additional authentication parameters */ + } auth; +} __rte_cache_aligned; + + +/** + * Reset the fields of a symmetric operation to their default values. + * + * @param op The crypto operation to be reset. + */ +static inline void +__rte_crypto_sym_op_reset(struct rte_crypto_sym_op *op) +{ + memset(op, 0, sizeof(*op)); + + op->sess_type = RTE_CRYPTO_SYM_OP_SESSIONLESS; +} + + +/** + * Allocate space for symmetric crypto xforms in the private data space of the + * crypto operation. This also defaults the crypto xform type to + * RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED and configures the chaining of the xforms + * in the crypto operation + * + * @return + * - On success returns pointer to first crypto xform in crypto operations chain + * - On failure returns NULL + */ +static inline struct rte_crypto_sym_xform * +__rte_crypto_sym_op_sym_xforms_alloc(struct rte_crypto_sym_op *sym_op, + void *priv_data, uint8_t nb_xforms) +{ + struct rte_crypto_sym_xform *xform; + + sym_op->xform = xform = (struct rte_crypto_sym_xform *)priv_data; + + do { + xform->type = RTE_CRYPTO_SYM_XFORM_NOT_SPECIFIED; + xform = xform->next = --nb_xforms > 0 ? xform + 1 : NULL; + } while (xform); + + return sym_op->xform; +} + + +/** + * Attach a session to a symmetric crypto operation + * + * @param sym_op crypto operation + * @param sess cryptodev session + */ +static inline int +__rte_crypto_sym_op_attach_sym_session(struct rte_crypto_sym_op *sym_op, + struct rte_cryptodev_sym_session *sess) +{ + sym_op->session = sess; + sym_op->sess_type = RTE_CRYPTO_SYM_OP_WITH_SESSION; + + return 0; +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CRYPTO_SYM_H_ */ diff --git a/lib/librte_cryptodev/rte_cryptodev.c b/lib/librte_cryptodev/rte_cryptodev.c new file mode 100644 index 00000000..aa4ea425 --- /dev/null +++ b/lib/librte_cryptodev/rte_cryptodev.c @@ -0,0 +1,1162 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_crypto.h" +#include "rte_cryptodev.h" +#include "rte_cryptodev_pmd.h" + +struct rte_cryptodev rte_crypto_devices[RTE_CRYPTO_MAX_DEVS]; + +struct rte_cryptodev *rte_cryptodevs = &rte_crypto_devices[0]; + +static struct rte_cryptodev_global cryptodev_globals = { + .devs = &rte_crypto_devices[0], + .data = { NULL }, + .nb_devs = 0, + .max_devs = RTE_CRYPTO_MAX_DEVS +}; + +struct rte_cryptodev_global *rte_cryptodev_globals = &cryptodev_globals; + +/* spinlock for crypto device callbacks */ +static rte_spinlock_t rte_cryptodev_cb_lock = RTE_SPINLOCK_INITIALIZER; + + +/** + * The user application callback description. + * + * It contains callback address to be registered by user application, + * the pointer to the parameters for callback, and the event type. + */ +struct rte_cryptodev_callback { + TAILQ_ENTRY(rte_cryptodev_callback) next; /**< Callbacks list */ + rte_cryptodev_cb_fn cb_fn; /**< Callback address */ + void *cb_arg; /**< Parameter for callback */ + enum rte_cryptodev_event_type event; /**< Interrupt event type */ + uint32_t active; /**< Callback is executing */ +}; + + +const char * +rte_cryptodev_get_feature_name(uint64_t flag) +{ + switch (flag) { + case RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO: + return "SYMMETRIC_CRYPTO"; + case RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO: + return "ASYMMETRIC_CRYPTO"; + case RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING: + return "SYM_OPERATION_CHAINING"; + case RTE_CRYPTODEV_FF_CPU_SSE: + return "CPU_SSE"; + case RTE_CRYPTODEV_FF_CPU_AVX: + return "CPU_AVX"; + case RTE_CRYPTODEV_FF_CPU_AVX2: + return "CPU_AVX2"; + case RTE_CRYPTODEV_FF_CPU_AESNI: + return "CPU_AESNI"; + case RTE_CRYPTODEV_FF_HW_ACCELERATED: + return "HW_ACCELERATED"; + + default: + return NULL; + } +} + + +int +rte_cryptodev_create_vdev(const char *name, const char *args) +{ + return rte_eal_vdev_init(name, args); +} + +int +rte_cryptodev_get_dev_id(const char *name) { + unsigned i; + + if (name == NULL) + return -1; + + for (i = 0; i < rte_cryptodev_globals->max_devs; i++) + if ((strcmp(rte_cryptodev_globals->devs[i].data->name, name) + == 0) && + (rte_cryptodev_globals->devs[i].attached == + RTE_CRYPTODEV_ATTACHED)) + return i; + + return -1; +} + +uint8_t +rte_cryptodev_count(void) +{ + return rte_cryptodev_globals->nb_devs; +} + +uint8_t +rte_cryptodev_count_devtype(enum rte_cryptodev_type type) +{ + uint8_t i, dev_count = 0; + + for (i = 0; i < rte_cryptodev_globals->max_devs; i++) + if (rte_cryptodev_globals->devs[i].dev_type == type && + rte_cryptodev_globals->devs[i].attached == + RTE_CRYPTODEV_ATTACHED) + dev_count++; + + return dev_count; +} + +int +rte_cryptodev_socket_id(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) + return -1; + + dev = rte_cryptodev_pmd_get_dev(dev_id); + + return dev->data->socket_id; +} + +static inline int +rte_cryptodev_data_alloc(uint8_t dev_id, struct rte_cryptodev_data **data, + int socket_id) +{ + char mz_name[RTE_CRYPTODEV_NAME_MAX_LEN]; + const struct rte_memzone *mz; + int n; + + /* generate memzone name */ + n = snprintf(mz_name, sizeof(mz_name), "rte_cryptodev_data_%u", dev_id); + if (n >= (int)sizeof(mz_name)) + return -EINVAL; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + mz = rte_memzone_reserve(mz_name, + sizeof(struct rte_cryptodev_data), + socket_id, 0); + } else + mz = rte_memzone_lookup(mz_name); + + if (mz == NULL) + return -ENOMEM; + + *data = mz->addr; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + memset(*data, 0, sizeof(struct rte_cryptodev_data)); + + return 0; +} + +static uint8_t +rte_cryptodev_find_free_device_index(void) +{ + uint8_t dev_id; + + for (dev_id = 0; dev_id < RTE_CRYPTO_MAX_DEVS; dev_id++) { + if (rte_crypto_devices[dev_id].attached == + RTE_CRYPTODEV_DETACHED) + return dev_id; + } + return RTE_CRYPTO_MAX_DEVS; +} + +struct rte_cryptodev * +rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id) +{ + struct rte_cryptodev *cryptodev; + uint8_t dev_id; + + if (rte_cryptodev_pmd_get_named_dev(name) != NULL) { + CDEV_LOG_ERR("Crypto device with name %s already " + "allocated!", name); + return NULL; + } + + dev_id = rte_cryptodev_find_free_device_index(); + if (dev_id == RTE_CRYPTO_MAX_DEVS) { + CDEV_LOG_ERR("Reached maximum number of crypto devices"); + return NULL; + } + + cryptodev = rte_cryptodev_pmd_get_dev(dev_id); + + if (cryptodev->data == NULL) { + struct rte_cryptodev_data *cryptodev_data = + cryptodev_globals.data[dev_id]; + + int retval = rte_cryptodev_data_alloc(dev_id, &cryptodev_data, + socket_id); + + if (retval < 0 || cryptodev_data == NULL) + return NULL; + + cryptodev->data = cryptodev_data; + + snprintf(cryptodev->data->name, RTE_CRYPTODEV_NAME_MAX_LEN, + "%s", name); + + cryptodev->data->dev_id = dev_id; + cryptodev->data->socket_id = socket_id; + cryptodev->data->dev_started = 0; + + cryptodev->attached = RTE_CRYPTODEV_ATTACHED; + cryptodev->pmd_type = type; + + cryptodev_globals.nb_devs++; + } + + return cryptodev; +} + +static inline int +rte_cryptodev_create_unique_device_name(char *name, size_t size, + struct rte_pci_device *pci_dev) +{ + int ret; + + if ((name == NULL) || (pci_dev == NULL)) + return -EINVAL; + + ret = snprintf(name, size, "%d:%d.%d", + pci_dev->addr.bus, pci_dev->addr.devid, + pci_dev->addr.function); + if (ret < 0) + return ret; + return 0; +} + +int +rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev) +{ + int ret; + + if (cryptodev == NULL) + return -EINVAL; + + ret = rte_cryptodev_close(cryptodev->data->dev_id); + if (ret < 0) + return ret; + + cryptodev->attached = RTE_CRYPTODEV_DETACHED; + cryptodev_globals.nb_devs--; + return 0; +} + +struct rte_cryptodev * +rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size, + int socket_id) +{ + struct rte_cryptodev *cryptodev; + + /* allocate device structure */ + cryptodev = rte_cryptodev_pmd_allocate(name, PMD_VDEV, socket_id); + if (cryptodev == NULL) + return NULL; + + /* allocate private device structure */ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + cryptodev->data->dev_private = + rte_zmalloc_socket("cryptodev device private", + dev_private_size, + RTE_CACHE_LINE_SIZE, + socket_id); + + if (cryptodev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private device" + " data"); + } + + /* initialise user call-back tail queue */ + TAILQ_INIT(&(cryptodev->link_intr_cbs)); + + return cryptodev; +} + +static int +rte_cryptodev_init(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) +{ + struct rte_cryptodev_driver *cryptodrv; + struct rte_cryptodev *cryptodev; + + char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; + + int retval; + + cryptodrv = (struct rte_cryptodev_driver *)pci_drv; + if (cryptodrv == NULL) + return -ENODEV; + + /* Create unique Crypto device name using PCI address */ + rte_cryptodev_create_unique_device_name(cryptodev_name, + sizeof(cryptodev_name), pci_dev); + + cryptodev = rte_cryptodev_pmd_allocate(cryptodev_name, PMD_PDEV, + rte_socket_id()); + if (cryptodev == NULL) + return -ENOMEM; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + cryptodev->data->dev_private = + rte_zmalloc_socket( + "cryptodev private structure", + cryptodrv->dev_private_size, + RTE_CACHE_LINE_SIZE, + rte_socket_id()); + + if (cryptodev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private " + "device data"); + } + + cryptodev->pci_dev = pci_dev; + cryptodev->driver = cryptodrv; + + /* init user callbacks */ + TAILQ_INIT(&(cryptodev->link_intr_cbs)); + + /* Invoke PMD device initialization function */ + retval = (*cryptodrv->cryptodev_init)(cryptodrv, cryptodev); + if (retval == 0) + return 0; + + CDEV_LOG_ERR("driver %s: crypto_dev_init(vendor_id=0x%x device_id=0x%x)" + " failed", pci_drv->name, + (unsigned) pci_dev->id.vendor_id, + (unsigned) pci_dev->id.device_id); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(cryptodev->data->dev_private); + + cryptodev->attached = RTE_CRYPTODEV_DETACHED; + cryptodev_globals.nb_devs--; + + return -ENXIO; +} + +static int +rte_cryptodev_uninit(struct rte_pci_device *pci_dev) +{ + const struct rte_cryptodev_driver *cryptodrv; + struct rte_cryptodev *cryptodev; + char cryptodev_name[RTE_CRYPTODEV_NAME_MAX_LEN]; + int ret; + + if (pci_dev == NULL) + return -EINVAL; + + /* Create unique device name using PCI address */ + rte_cryptodev_create_unique_device_name(cryptodev_name, + sizeof(cryptodev_name), pci_dev); + + cryptodev = rte_cryptodev_pmd_get_named_dev(cryptodev_name); + if (cryptodev == NULL) + return -ENODEV; + + cryptodrv = (const struct rte_cryptodev_driver *)pci_dev->driver; + if (cryptodrv == NULL) + return -ENODEV; + + /* Invoke PMD device uninit function */ + if (*cryptodrv->cryptodev_uninit) { + ret = (*cryptodrv->cryptodev_uninit)(cryptodrv, cryptodev); + if (ret) + return ret; + } + + /* free crypto device */ + rte_cryptodev_pmd_release_device(cryptodev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(cryptodev->data->dev_private); + + cryptodev->pci_dev = NULL; + cryptodev->driver = NULL; + cryptodev->data = NULL; + + return 0; +} + +int +rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *cryptodrv, + enum pmd_type type) +{ + /* Call crypto device initialization directly if device is virtual */ + if (type == PMD_VDEV) + return rte_cryptodev_init((struct rte_pci_driver *)cryptodrv, + NULL); + + /* + * Register PCI driver for physical device intialisation during + * PCI probing + */ + cryptodrv->pci_drv.devinit = rte_cryptodev_init; + cryptodrv->pci_drv.devuninit = rte_cryptodev_uninit; + + rte_eal_pci_register(&cryptodrv->pci_drv); + + return 0; +} + + +uint16_t +rte_cryptodev_queue_pair_count(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + + dev = &rte_crypto_devices[dev_id]; + return dev->data->nb_queue_pairs; +} + +static int +rte_cryptodev_queue_pairs_config(struct rte_cryptodev *dev, uint16_t nb_qpairs, + int socket_id) +{ + struct rte_cryptodev_info dev_info; + void **qp; + unsigned i; + + if ((dev == NULL) || (nb_qpairs < 1)) { + CDEV_LOG_ERR("invalid param: dev %p, nb_queues %u", + dev, nb_qpairs); + return -EINVAL; + } + + CDEV_LOG_DEBUG("Setup %d queues pairs on device %u", + nb_qpairs, dev->data->dev_id); + + memset(&dev_info, 0, sizeof(struct rte_cryptodev_info)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + (*dev->dev_ops->dev_infos_get)(dev, &dev_info); + + if (nb_qpairs > (dev_info.max_nb_queue_pairs)) { + CDEV_LOG_ERR("Invalid num queue_pairs (%u) for dev %u", + nb_qpairs, dev->data->dev_id); + return -EINVAL; + } + + if (dev->data->queue_pairs == NULL) { /* first time configuration */ + dev->data->queue_pairs = rte_zmalloc_socket( + "cryptodev->queue_pairs", + sizeof(dev->data->queue_pairs[0]) * nb_qpairs, + RTE_CACHE_LINE_SIZE, socket_id); + + if (dev->data->queue_pairs == NULL) { + dev->data->nb_queue_pairs = 0; + CDEV_LOG_ERR("failed to get memory for qp meta data, " + "nb_queues %u", + nb_qpairs); + return -(ENOMEM); + } + } else { /* re-configure */ + int ret; + uint16_t old_nb_queues = dev->data->nb_queue_pairs; + + qp = dev->data->queue_pairs; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_release, + -ENOTSUP); + + for (i = nb_qpairs; i < old_nb_queues; i++) { + ret = (*dev->dev_ops->queue_pair_release)(dev, i); + if (ret < 0) + return ret; + } + + qp = rte_realloc(qp, sizeof(qp[0]) * nb_qpairs, + RTE_CACHE_LINE_SIZE); + if (qp == NULL) { + CDEV_LOG_ERR("failed to realloc qp meta data," + " nb_queues %u", nb_qpairs); + return -(ENOMEM); + } + + if (nb_qpairs > old_nb_queues) { + uint16_t new_qs = nb_qpairs - old_nb_queues; + + memset(qp + old_nb_queues, 0, + sizeof(qp[0]) * new_qs); + } + + dev->data->queue_pairs = qp; + + } + dev->data->nb_queue_pairs = nb_qpairs; + return 0; +} + +int +rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + if (queue_pair_id >= dev->data->nb_queue_pairs) { + CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_start, -ENOTSUP); + + return dev->dev_ops->queue_pair_start(dev, queue_pair_id); + +} + +int +rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + if (queue_pair_id >= dev->data->nb_queue_pairs) { + CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_stop, -ENOTSUP); + + return dev->dev_ops->queue_pair_stop(dev, queue_pair_id); + +} + +static int +rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev, + unsigned nb_objs, unsigned obj_cache_size, int socket_id); + +int +rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config) +{ + struct rte_cryptodev *dev; + int diag; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + + if (dev->data->dev_started) { + CDEV_LOG_ERR( + "device %d must be stopped to allow configuration", dev_id); + return -EBUSY; + } + + /* Setup new number of queue pairs and reconfigure device. */ + diag = rte_cryptodev_queue_pairs_config(dev, config->nb_queue_pairs, + config->socket_id); + if (diag != 0) { + CDEV_LOG_ERR("dev%d rte_crypto_dev_queue_pairs_config = %d", + dev_id, diag); + return diag; + } + + /* Setup Session mempool for device */ + return rte_cryptodev_sym_session_pool_create(dev, + config->session_mp.nb_objs, + config->session_mp.cache_size, + config->socket_id); +} + + +int +rte_cryptodev_start(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + int diag; + + CDEV_LOG_DEBUG("Start dev_id=%" PRIu8, dev_id); + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); + + if (dev->data->dev_started != 0) { + CDEV_LOG_ERR("Device with dev_id=%" PRIu8 " already started", + dev_id); + return 0; + } + + diag = (*dev->dev_ops->dev_start)(dev); + if (diag == 0) + dev->data->dev_started = 1; + else + return diag; + + return 0; +} + +void +rte_cryptodev_stop(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return; + } + + dev = &rte_crypto_devices[dev_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); + + if (dev->data->dev_started == 0) { + CDEV_LOG_ERR("Device with dev_id=%" PRIu8 " already stopped", + dev_id); + return; + } + + dev->data->dev_started = 0; + (*dev->dev_ops->dev_stop)(dev); +} + +int +rte_cryptodev_close(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + int retval; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -1; + } + + dev = &rte_crypto_devices[dev_id]; + + /* Device must be stopped before it can be closed */ + if (dev->data->dev_started == 1) { + CDEV_LOG_ERR("Device %u must be stopped before closing", + dev_id); + return -EBUSY; + } + + /* We can't close the device if there are outstanding sessions in use */ + if (dev->data->session_pool != NULL) { + if (!rte_mempool_full(dev->data->session_pool)) { + CDEV_LOG_ERR("dev_id=%u close failed, session mempool " + "has sessions still in use, free " + "all sessions before calling close", + (unsigned)dev_id); + return -EBUSY; + } + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_close, -ENOTSUP); + retval = (*dev->dev_ops->dev_close)(dev); + + if (retval < 0) + return retval; + + return 0; +} + +int +rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, + const struct rte_cryptodev_qp_conf *qp_conf, int socket_id) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + if (queue_pair_id >= dev->data->nb_queue_pairs) { + CDEV_LOG_ERR("Invalid queue_pair_id=%d", queue_pair_id); + return -EINVAL; + } + + if (dev->data->dev_started) { + CDEV_LOG_ERR( + "device %d must be stopped to allow configuration", dev_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_pair_setup, -ENOTSUP); + + return (*dev->dev_ops->queue_pair_setup)(dev, queue_pair_id, qp_conf, + socket_id); +} + + +int +rte_cryptodev_stats_get(uint8_t dev_id, struct rte_cryptodev_stats *stats) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); + return -ENODEV; + } + + if (stats == NULL) { + CDEV_LOG_ERR("Invalid stats ptr"); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + memset(stats, 0, sizeof(*stats)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); + (*dev->dev_ops->stats_get)(dev, stats); + return 0; +} + +void +rte_cryptodev_stats_reset(uint8_t dev_id) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return; + } + + dev = &rte_crypto_devices[dev_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset); + (*dev->dev_ops->stats_reset)(dev); +} + + +void +rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info) +{ + struct rte_cryptodev *dev; + + if (dev_id >= cryptodev_globals.nb_devs) { + CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); + return; + } + + dev = &rte_crypto_devices[dev_id]; + + memset(dev_info, 0, sizeof(struct rte_cryptodev_info)); + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); + (*dev->dev_ops->dev_infos_get)(dev, dev_info); + + dev_info->pci_dev = dev->pci_dev; + if (dev->driver) + dev_info->driver_name = dev->driver->pci_drv.name; +} + + +int +rte_cryptodev_callback_register(uint8_t dev_id, + enum rte_cryptodev_event_type event, + rte_cryptodev_cb_fn cb_fn, void *cb_arg) +{ + struct rte_cryptodev *dev; + struct rte_cryptodev_callback *user_cb; + + if (!cb_fn) + return -EINVAL; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + rte_spinlock_lock(&rte_cryptodev_cb_lock); + + TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) { + if (user_cb->cb_fn == cb_fn && + user_cb->cb_arg == cb_arg && + user_cb->event == event) { + break; + } + } + + /* create a new callback. */ + if (user_cb == NULL) { + user_cb = rte_zmalloc("INTR_USER_CALLBACK", + sizeof(struct rte_cryptodev_callback), 0); + if (user_cb != NULL) { + user_cb->cb_fn = cb_fn; + user_cb->cb_arg = cb_arg; + user_cb->event = event; + TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next); + } + } + + rte_spinlock_unlock(&rte_cryptodev_cb_lock); + return (user_cb == NULL) ? -ENOMEM : 0; +} + +int +rte_cryptodev_callback_unregister(uint8_t dev_id, + enum rte_cryptodev_event_type event, + rte_cryptodev_cb_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_cryptodev *dev; + struct rte_cryptodev_callback *cb, *next; + + if (!cb_fn) + return -EINVAL; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%" PRIu8, dev_id); + return -EINVAL; + } + + dev = &rte_crypto_devices[dev_id]; + rte_spinlock_lock(&rte_cryptodev_cb_lock); + + ret = 0; + for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn != cb_fn || cb->event != event || + (cb->cb_arg != (void *)-1 && + cb->cb_arg != cb_arg)) + continue; + + /* + * if this callback is not executing right now, + * then remove it. + */ + if (cb->active == 0) { + TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next); + rte_free(cb); + } else { + ret = -EAGAIN; + } + } + + rte_spinlock_unlock(&rte_cryptodev_cb_lock); + return ret; +} + +void +rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev, + enum rte_cryptodev_event_type event) +{ + struct rte_cryptodev_callback *cb_lst; + struct rte_cryptodev_callback dev_cb; + + rte_spinlock_lock(&rte_cryptodev_cb_lock); + TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) { + if (cb_lst->cb_fn == NULL || cb_lst->event != event) + continue; + dev_cb = *cb_lst; + cb_lst->active = 1; + rte_spinlock_unlock(&rte_cryptodev_cb_lock); + dev_cb.cb_fn(dev->data->dev_id, dev_cb.event, + dev_cb.cb_arg); + rte_spinlock_lock(&rte_cryptodev_cb_lock); + cb_lst->active = 0; + } + rte_spinlock_unlock(&rte_cryptodev_cb_lock); +} + + +static void +rte_cryptodev_sym_session_init(struct rte_mempool *mp, + void *opaque_arg, + void *_sess, + __rte_unused unsigned i) +{ + struct rte_cryptodev_sym_session *sess = _sess; + struct rte_cryptodev *dev = opaque_arg; + + memset(sess, 0, mp->elt_size); + + sess->dev_id = dev->data->dev_id; + sess->dev_type = dev->dev_type; + sess->mp = mp; + + if (dev->dev_ops->session_initialize) + (*dev->dev_ops->session_initialize)(mp, sess->_private); +} + +static int +rte_cryptodev_sym_session_pool_create(struct rte_cryptodev *dev, + unsigned nb_objs, unsigned obj_cache_size, int socket_id) +{ + char mp_name[RTE_CRYPTODEV_NAME_MAX_LEN]; + unsigned priv_sess_size; + + unsigned n = snprintf(mp_name, sizeof(mp_name), "cdev_%d_sess_mp", + dev->data->dev_id); + if (n > sizeof(mp_name)) { + CDEV_LOG_ERR("Unable to create unique name for session mempool"); + return -ENOMEM; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_get_size, -ENOTSUP); + priv_sess_size = (*dev->dev_ops->session_get_size)(dev); + if (priv_sess_size == 0) { + CDEV_LOG_ERR("%s returned and invalid private session size ", + dev->data->name); + return -ENOMEM; + } + + unsigned elt_size = sizeof(struct rte_cryptodev_sym_session) + + priv_sess_size; + + dev->data->session_pool = rte_mempool_lookup(mp_name); + if (dev->data->session_pool != NULL) { + if ((dev->data->session_pool->elt_size != elt_size) || + (dev->data->session_pool->cache_size < + obj_cache_size) || + (dev->data->session_pool->size < nb_objs)) { + + CDEV_LOG_ERR("%s mempool already exists with different" + " initialization parameters", mp_name); + dev->data->session_pool = NULL; + return -ENOMEM; + } + } else { + dev->data->session_pool = rte_mempool_create( + mp_name, /* mempool name */ + nb_objs, /* number of elements*/ + elt_size, /* element size*/ + obj_cache_size, /* Cache size*/ + 0, /* private data size */ + NULL, /* obj initialization constructor */ + NULL, /* obj initialization constructor arg */ + rte_cryptodev_sym_session_init, + /**< obj constructor*/ + dev, /* obj constructor arg */ + socket_id, /* socket id */ + 0); /* flags */ + + if (dev->data->session_pool == NULL) { + CDEV_LOG_ERR("%s mempool allocation failed", mp_name); + return -ENOMEM; + } + } + + CDEV_LOG_DEBUG("%s mempool created!", mp_name); + return 0; +} + +struct rte_cryptodev_sym_session * +rte_cryptodev_sym_session_create(uint8_t dev_id, + struct rte_crypto_sym_xform *xform) +{ + struct rte_cryptodev *dev; + struct rte_cryptodev_sym_session *sess; + void *_sess; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); + return NULL; + } + + dev = &rte_crypto_devices[dev_id]; + + /* Allocate a session structure from the session pool */ + if (rte_mempool_get(dev->data->session_pool, &_sess)) { + CDEV_LOG_ERR("Couldn't get object from session mempool"); + return NULL; + } + + sess = (struct rte_cryptodev_sym_session *)_sess; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_configure, NULL); + if (dev->dev_ops->session_configure(dev, xform, sess->_private) == + NULL) { + CDEV_LOG_ERR("dev_id %d failed to configure session details", + dev_id); + + /* Return session to mempool */ + rte_mempool_put(sess->mp, _sess); + return NULL; + } + + return sess; +} + +struct rte_cryptodev_sym_session * +rte_cryptodev_sym_session_free(uint8_t dev_id, + struct rte_cryptodev_sym_session *sess) +{ + struct rte_cryptodev *dev; + + if (!rte_cryptodev_pmd_is_valid_dev(dev_id)) { + CDEV_LOG_ERR("Invalid dev_id=%d", dev_id); + return sess; + } + + dev = &rte_crypto_devices[dev_id]; + + /* Check the session belongs to this device type */ + if (sess->dev_type != dev->dev_type) + return sess; + + /* Let device implementation clear session material */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->session_clear, sess); + dev->dev_ops->session_clear(dev, (void *)sess->_private); + + /* Return session to mempool */ + rte_mempool_put(sess->mp, (void *)sess); + + return NULL; +} + +/** Initialise rte_crypto_op mempool element */ +static void +rte_crypto_op_init(struct rte_mempool *mempool, + void *opaque_arg, + void *_op_data, + __rte_unused unsigned i) +{ + struct rte_crypto_op *op = _op_data; + enum rte_crypto_op_type type = *(enum rte_crypto_op_type *)opaque_arg; + + memset(_op_data, 0, mempool->elt_size); + + __rte_crypto_op_reset(op, type); + + op->phys_addr = rte_mem_virt2phy(_op_data); + op->mempool = mempool; +} + + +struct rte_mempool * +rte_crypto_op_pool_create(const char *name, enum rte_crypto_op_type type, + unsigned nb_elts, unsigned cache_size, uint16_t priv_size, + int socket_id) +{ + struct rte_crypto_op_pool_private *priv; + + unsigned elt_size = sizeof(struct rte_crypto_op) + + sizeof(struct rte_crypto_sym_op) + + priv_size; + + /* lookup mempool in case already allocated */ + struct rte_mempool *mp = rte_mempool_lookup(name); + + if (mp != NULL) { + priv = (struct rte_crypto_op_pool_private *) + rte_mempool_get_priv(mp); + + if (mp->elt_size != elt_size || + mp->cache_size < cache_size || + mp->size < nb_elts || + priv->priv_size < priv_size) { + mp = NULL; + CDEV_LOG_ERR("Mempool %s already exists but with " + "incompatible parameters", name); + return NULL; + } + return mp; + } + + mp = rte_mempool_create( + name, + nb_elts, + elt_size, + cache_size, + sizeof(struct rte_crypto_op_pool_private), + NULL, + NULL, + rte_crypto_op_init, + &type, + socket_id, + 0); + + if (mp == NULL) { + CDEV_LOG_ERR("Failed to create mempool %s", name); + return NULL; + } + + priv = (struct rte_crypto_op_pool_private *) + rte_mempool_get_priv(mp); + + priv->priv_size = priv_size; + priv->type = type; + + return mp; +} diff --git a/lib/librte_cryptodev/rte_cryptodev.h b/lib/librte_cryptodev/rte_cryptodev.h new file mode 100644 index 00000000..d47f1e87 --- /dev/null +++ b/lib/librte_cryptodev/rte_cryptodev.h @@ -0,0 +1,896 @@ +/*- + * + * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRYPTODEV_H_ +#define _RTE_CRYPTODEV_H_ + +/** + * @file rte_cryptodev.h + * + * RTE Cryptographic Device APIs + * + * Defines RTE Crypto Device APIs for the provisioning of cipher and + * authentication operations. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "rte_kvargs.h" +#include "rte_crypto.h" +#include "rte_dev.h" + +#define CRYPTODEV_NAME_NULL_PMD ("cryptodev_null_pmd") +/**< Null crypto PMD device name */ +#define CRYPTODEV_NAME_AESNI_MB_PMD ("cryptodev_aesni_mb_pmd") +/**< AES-NI Multi buffer PMD device name */ +#define CRYPTODEV_NAME_AESNI_GCM_PMD ("cryptodev_aesni_gcm_pmd") +/**< AES-NI GCM PMD device name */ +#define CRYPTODEV_NAME_QAT_SYM_PMD ("cryptodev_qat_sym_pmd") +/**< Intel QAT Symmetric Crypto PMD device name */ +#define CRYPTODEV_NAME_SNOW3G_PMD ("cryptodev_snow3g_pmd") +/**< SNOW 3G PMD device name */ + +/** Crypto device type */ +enum rte_cryptodev_type { + RTE_CRYPTODEV_NULL_PMD = 1, /**< Null crypto PMD */ + RTE_CRYPTODEV_AESNI_GCM_PMD, /**< AES-NI GCM PMD */ + RTE_CRYPTODEV_AESNI_MB_PMD, /**< AES-NI multi buffer PMD */ + RTE_CRYPTODEV_QAT_SYM_PMD, /**< QAT PMD Symmetric Crypto */ + RTE_CRYPTODEV_SNOW3G_PMD, /**< SNOW 3G PMD */ +}; + +extern const char **rte_cyptodev_names; + +/* Logging Macros */ + +#define CDEV_LOG_ERR(fmt, args...) \ + RTE_LOG(ERR, CRYPTODEV, "%s() line %u: " fmt "\n", \ + __func__, __LINE__, ## args) + +#define CDEV_PMD_LOG_ERR(dev, fmt, args...) \ + RTE_LOG(ERR, CRYPTODEV, "[%s] %s() line %u: " fmt "\n", \ + dev, __func__, __LINE__, ## args) + +#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG +#define CDEV_LOG_DEBUG(fmt, args...) \ + RTE_LOG(DEBUG, CRYPTODEV, "%s() line %u: " fmt "\n", \ + __func__, __LINE__, ## args) \ + +#define CDEV_PMD_TRACE(fmt, args...) \ + RTE_LOG(DEBUG, CRYPTODEV, "[%s] %s: " fmt "\n", \ + dev, __func__, ## args) + +#else +#define CDEV_LOG_DEBUG(fmt, args...) +#define CDEV_PMD_TRACE(fmt, args...) +#endif + +/** + * Symmetric Crypto Capability + */ +struct rte_cryptodev_symmetric_capability { + enum rte_crypto_sym_xform_type xform_type; + /**< Transform type : Authentication / Cipher */ + union { + struct { + enum rte_crypto_auth_algorithm algo; + /**< authentication algorithm */ + uint16_t block_size; + /**< algorithm block size */ + struct { + uint16_t min; /**< minimum key size */ + uint16_t max; /**< maximum key size */ + uint16_t increment; + /**< if a range of sizes are supported, + * this parameter is used to indicate + * increments in byte size that are supported + * between the minimum and maximum */ + } key_size; + /**< auth key size range */ + struct { + uint16_t min; /**< minimum digest size */ + uint16_t max; /**< maximum digest size */ + uint16_t increment; + /**< if a range of sizes are supported, + * this parameter is used to indicate + * increments in byte size that are supported + * between the minimum and maximum */ + } digest_size; + /**< digest size range */ + struct { + uint16_t min; /**< minimum aad size */ + uint16_t max; /**< maximum aad size */ + uint16_t increment; + /**< if a range of sizes are supported, + * this parameter is used to indicate + * increments in byte size that are supported + * between the minimum and maximum */ + } aad_size; + /**< Additional authentication data size range */ + } auth; + /**< Symmetric Authentication transform capabilities */ + struct { + enum rte_crypto_cipher_algorithm algo; + /**< cipher algorithm */ + uint16_t block_size; + /**< algorithm block size */ + struct { + uint16_t min; /**< minimum key size */ + uint16_t max; /**< maximum key size */ + uint16_t increment; + /**< if a range of sizes are supported, + * this parameter is used to indicate + * increments in byte size that are supported + * between the minimum and maximum */ + } key_size; + /**< cipher key size range */ + struct { + uint16_t min; /**< minimum iv size */ + uint16_t max; /**< maximum iv size */ + uint16_t increment; + /**< if a range of sizes are supported, + * this parameter is used to indicate + * increments in byte size that are supported + * between the minimum and maximum */ + } iv_size; + /**< Initialisation vector data size range */ + } cipher; + /**< Symmetric Cipher transform capabilities */ + }; +}; + +/** Structure used to capture a capability of a crypto device */ +struct rte_cryptodev_capabilities { + enum rte_crypto_op_type op; + /**< Operation type */ + + union { + struct rte_cryptodev_symmetric_capability sym; + /**< Symmetric operation capability parameters */ + }; +}; + +/** Macro used at end of crypto PMD list */ +#define RTE_CRYPTODEV_END_OF_CAPABILITIES_LIST() \ + { RTE_CRYPTO_OP_TYPE_UNDEFINED } + + +/** + * Crypto device supported feature flags + * + * Note: + * New features flags should be added to the end of the list + * + * Keep these flags synchronised with rte_cryptodev_get_feature_name() + */ +#define RTE_CRYPTODEV_FF_SYMMETRIC_CRYPTO (1ULL << 0) +/**< Symmetric crypto operations are supported */ +#define RTE_CRYPTODEV_FF_ASYMMETRIC_CRYPTO (1ULL << 1) +/**< Asymmetric crypto operations are supported */ +#define RTE_CRYPTODEV_FF_SYM_OPERATION_CHAINING (1ULL << 2) +/**< Chaining symmetric crypto operations are supported */ +#define RTE_CRYPTODEV_FF_CPU_SSE (1ULL << 3) +/**< Utilises CPU SIMD SSE instructions */ +#define RTE_CRYPTODEV_FF_CPU_AVX (1ULL << 4) +/**< Utilises CPU SIMD AVX instructions */ +#define RTE_CRYPTODEV_FF_CPU_AVX2 (1ULL << 5) +/**< Utilises CPU SIMD AVX2 instructions */ +#define RTE_CRYPTODEV_FF_CPU_AESNI (1ULL << 6) +/**< Utilises CPU AES-NI instructions */ +#define RTE_CRYPTODEV_FF_HW_ACCELERATED (1ULL << 7) +/**< Operations are off-loaded to an external hardware accelerator */ + + +/** + * Get the name of a crypto device feature flag + * + * @param flag The mask describing the flag. + * + * @return + * The name of this flag, or NULL if it's not a valid feature flag. + */ + +extern const char * +rte_cryptodev_get_feature_name(uint64_t flag); + +/** Crypto device information */ +struct rte_cryptodev_info { + const char *driver_name; /**< Driver name. */ + enum rte_cryptodev_type dev_type; /**< Device type */ + struct rte_pci_device *pci_dev; /**< PCI information. */ + + uint64_t feature_flags; /**< Feature flags */ + + const struct rte_cryptodev_capabilities *capabilities; + /**< Array of devices supported capabilities */ + + unsigned max_nb_queue_pairs; + /**< Maximum number of queues pairs supported by device. */ + + struct { + unsigned max_nb_sessions; + /**< Maximum number of sessions supported by device. */ + } sym; +}; + +#define RTE_CRYPTODEV_DETACHED (0) +#define RTE_CRYPTODEV_ATTACHED (1) + +/** Definitions of Crypto device event types */ +enum rte_cryptodev_event_type { + RTE_CRYPTODEV_EVENT_UNKNOWN, /**< unknown event type */ + RTE_CRYPTODEV_EVENT_ERROR, /**< error interrupt event */ + RTE_CRYPTODEV_EVENT_MAX /**< max value of this enum */ +}; + +/** Crypto device queue pair configuration structure. */ +struct rte_cryptodev_qp_conf { + uint32_t nb_descriptors; /**< Number of descriptors per queue pair */ +}; + +/** + * Typedef for application callback function to be registered by application + * software for notification of device events + * + * @param dev_id Crypto device identifier + * @param event Crypto device event to register for notification of. + * @param cb_arg User specified parameter to be passed as to passed to + * users callback function. + */ +typedef void (*rte_cryptodev_cb_fn)(uint8_t dev_id, + enum rte_cryptodev_event_type event, void *cb_arg); + + +/** Crypto Device statistics */ +struct rte_cryptodev_stats { + uint64_t enqueued_count; + /**< Count of all operations enqueued */ + uint64_t dequeued_count; + /**< Count of all operations dequeued */ + + uint64_t enqueue_err_count; + /**< Total error count on operations enqueued */ + uint64_t dequeue_err_count; + /**< Total error count on operations dequeued */ +}; + +#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_QUEUE_PAIRS 8 +#define RTE_CRYPTODEV_VDEV_DEFAULT_MAX_NB_SESSIONS 2048 + +/** + * @internal + * Initialisation parameters for virtual crypto devices + */ +struct rte_crypto_vdev_init_params { + unsigned max_nb_queue_pairs; + unsigned max_nb_sessions; + uint8_t socket_id; +}; + +#define RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG ("max_nb_queue_pairs") +#define RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG ("max_nb_sessions") +#define RTE_CRYPTODEV_VDEV_SOCKET_ID ("socket_id") + +static const char *cryptodev_vdev_valid_params[] = { + RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, + RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, + RTE_CRYPTODEV_VDEV_SOCKET_ID +}; + +static inline uint8_t +number_of_sockets(void) +{ + int sockets = 0; + int i; + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + + for (i = 0; ((i < RTE_MAX_MEMSEG) && (ms[i].addr != NULL)); i++) { + if (sockets < ms[i].socket_id) + sockets = ms[i].socket_id; + } + + /* Number of sockets = maximum socket_id + 1 */ + return ++sockets; +} + +/** Parse integer from integer argument */ +static inline int +__rte_cryptodev_parse_integer_arg(const char *key __rte_unused, + const char *value, void *extra_args) +{ + int *i = (int *) extra_args; + + *i = atoi(value); + if (*i < 0) { + CDEV_LOG_ERR("Argument has to be positive."); + return -1; + } + + return 0; +} + +/** + * Parse virtual device initialisation parameters input arguments + * @internal + * + * @params params Initialisation parameters with defaults set. + * @params input_args Command line arguments + * + * @return + * 0 on successful parse + * <0 on failure to parse + */ +static inline int +rte_cryptodev_parse_vdev_init_params(struct rte_crypto_vdev_init_params *params, + const char *input_args) +{ + struct rte_kvargs *kvlist; + int ret; + + if (params == NULL) + return -EINVAL; + + if (input_args) { + kvlist = rte_kvargs_parse(input_args, + cryptodev_vdev_valid_params); + if (kvlist == NULL) + return -1; + + ret = rte_kvargs_process(kvlist, + RTE_CRYPTODEV_VDEV_MAX_NB_QP_ARG, + &__rte_cryptodev_parse_integer_arg, + ¶ms->max_nb_queue_pairs); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, + RTE_CRYPTODEV_VDEV_MAX_NB_SESS_ARG, + &__rte_cryptodev_parse_integer_arg, + ¶ms->max_nb_sessions); + if (ret < 0) + goto free_kvlist; + + ret = rte_kvargs_process(kvlist, RTE_CRYPTODEV_VDEV_SOCKET_ID, + &__rte_cryptodev_parse_integer_arg, + ¶ms->socket_id); + if (ret < 0) + goto free_kvlist; + + if (params->socket_id >= number_of_sockets()) { + CDEV_LOG_ERR("Invalid socket id specified to create " + "the virtual crypto device on"); + goto free_kvlist; + } + } + + return 0; + +free_kvlist: + rte_kvargs_free(kvlist); + return ret; +} + +/** + * Create a virtual crypto device + * + * @param name Cryptodev PMD name of device to be created. + * @param args Options arguments for device. + * + * @return + * - On successful creation of the cryptodev the device index is returned, + * which will be between 0 and rte_cryptodev_count(). + * - In the case of a failure, returns -1. + */ +extern int +rte_cryptodev_create_vdev(const char *name, const char *args); + +/** + * Get the device identifier for the named crypto device. + * + * @param name device name to select the device structure. + * + * @return + * - Returns crypto device identifier on success. + * - Return -1 on failure to find named crypto device. + */ +extern int +rte_cryptodev_get_dev_id(const char *name); + +/** + * Get the total number of crypto devices that have been successfully + * initialised. + * + * @return + * - The total number of usable crypto devices. + */ +extern uint8_t +rte_cryptodev_count(void); + +extern uint8_t +rte_cryptodev_count_devtype(enum rte_cryptodev_type type); +/* + * Return the NUMA socket to which a device is connected + * + * @param dev_id + * The identifier of the device + * @return + * The NUMA socket id to which the device is connected or + * a default of zero if the socket could not be determined. + * -1 if returned is the dev_id value is out of range. + */ +extern int +rte_cryptodev_socket_id(uint8_t dev_id); + +/** Crypto device configuration structure */ +struct rte_cryptodev_config { + int socket_id; /**< Socket to allocate resources on */ + uint16_t nb_queue_pairs; + /**< Number of queue pairs to configure on device */ + + struct { + uint32_t nb_objs; /**< Number of objects in mempool */ + uint32_t cache_size; /**< l-core object cache size */ + } session_mp; /**< Session mempool configuration */ +}; + +/** + * Configure a device. + * + * This function must be invoked first before any other function in the + * API. This function can also be re-invoked when a device is in the + * stopped state. + * + * @param dev_id The identifier of the device to configure. + * @param config The crypto device configuration structure. + * + * @return + * - 0: Success, device configured. + * - <0: Error code returned by the driver configuration function. + */ +extern int +rte_cryptodev_configure(uint8_t dev_id, struct rte_cryptodev_config *config); + +/** + * Start an device. + * + * The device start step is the last one and consists of setting the configured + * offload features and in starting the transmit and the receive units of the + * device. + * On success, all basic functions exported by the API (link status, + * receive/transmit, and so on) can be invoked. + * + * @param dev_id + * The identifier of the device. + * @return + * - 0: Success, device started. + * - <0: Error code of the driver device start function. + */ +extern int +rte_cryptodev_start(uint8_t dev_id); + +/** + * Stop an device. The device can be restarted with a call to + * rte_cryptodev_start() + * + * @param dev_id The identifier of the device. + */ +extern void +rte_cryptodev_stop(uint8_t dev_id); + +/** + * Close an device. The device cannot be restarted! + * + * @param dev_id The identifier of the device. + * + * @return + * - 0 on successfully closing device + * - <0 on failure to close device + */ +extern int +rte_cryptodev_close(uint8_t dev_id); + +/** + * Allocate and set up a receive queue pair for a device. + * + * + * @param dev_id The identifier of the device. + * @param queue_pair_id The index of the queue pairs to set up. The + * value must be in the range [0, nb_queue_pair + * - 1] previously supplied to + * rte_cryptodev_configure(). + * @param qp_conf The pointer to the configuration data to be + * used for the queue pair. NULL value is + * allowed, in which case default configuration + * will be used. + * @param socket_id The *socket_id* argument is the socket + * identifier in case of NUMA. The value can be + * *SOCKET_ID_ANY* if there is no NUMA constraint + * for the DMA memory allocated for the receive + * queue pair. + * + * @return + * - 0: Success, queue pair correctly set up. + * - <0: Queue pair configuration failed + */ +extern int +rte_cryptodev_queue_pair_setup(uint8_t dev_id, uint16_t queue_pair_id, + const struct rte_cryptodev_qp_conf *qp_conf, int socket_id); + +/** + * Start a specified queue pair of a device. It is used + * when deferred_start flag of the specified queue is true. + * + * @param dev_id The identifier of the device + * @param queue_pair_id The index of the queue pair to start. The value + * must be in the range [0, nb_queue_pair - 1] + * previously supplied to + * rte_crypto_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The dev_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +extern int +rte_cryptodev_queue_pair_start(uint8_t dev_id, uint16_t queue_pair_id); + +/** + * Stop specified queue pair of a device + * + * @param dev_id The identifier of the device + * @param queue_pair_id The index of the queue pair to stop. The value + * must be in the range [0, nb_queue_pair - 1] + * previously supplied to + * rte_cryptodev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The dev_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +extern int +rte_cryptodev_queue_pair_stop(uint8_t dev_id, uint16_t queue_pair_id); + +/** + * Get the number of queue pairs on a specific crypto device + * + * @param dev_id Crypto device identifier. + * @return + * - The number of configured queue pairs. + */ +extern uint16_t +rte_cryptodev_queue_pair_count(uint8_t dev_id); + + +/** + * Retrieve the general I/O statistics of a device. + * + * @param dev_id The identifier of the device. + * @param stats A pointer to a structure of type + * *rte_cryptodev_stats* to be filled with the + * values of device counters. + * @return + * - Zero if successful. + * - Non-zero otherwise. + */ +extern int +rte_cryptodev_stats_get(uint8_t dev_id, struct rte_cryptodev_stats *stats); + +/** + * Reset the general I/O statistics of a device. + * + * @param dev_id The identifier of the device. + */ +extern void +rte_cryptodev_stats_reset(uint8_t dev_id); + +/** + * Retrieve the contextual information of a device. + * + * @param dev_id The identifier of the device. + * @param dev_info A pointer to a structure of type + * *rte_cryptodev_info* to be filled with the + * contextual information of the device. + */ +extern void +rte_cryptodev_info_get(uint8_t dev_id, struct rte_cryptodev_info *dev_info); + + +/** + * Register a callback function for specific device id. + * + * @param dev_id Device id. + * @param event Event interested. + * @param cb_fn User supplied callback function to be called. + * @param cb_arg Pointer to the parameters for the registered + * callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +extern int +rte_cryptodev_callback_register(uint8_t dev_id, + enum rte_cryptodev_event_type event, + rte_cryptodev_cb_fn cb_fn, void *cb_arg); + +/** + * Unregister a callback function for specific device id. + * + * @param dev_id The device identifier. + * @param event Event interested. + * @param cb_fn User supplied callback function to be called. + * @param cb_arg Pointer to the parameters for the registered + * callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +extern int +rte_cryptodev_callback_unregister(uint8_t dev_id, + enum rte_cryptodev_event_type event, + rte_cryptodev_cb_fn cb_fn, void *cb_arg); + + +typedef uint16_t (*dequeue_pkt_burst_t)(void *qp, + struct rte_crypto_op **ops, uint16_t nb_ops); +/**< Dequeue processed packets from queue pair of a device. */ + +typedef uint16_t (*enqueue_pkt_burst_t)(void *qp, + struct rte_crypto_op **ops, uint16_t nb_ops); +/**< Enqueue packets for processing on queue pair of a device. */ + + + + +struct rte_cryptodev_callback; + +/** Structure to keep track of registered callbacks */ +TAILQ_HEAD(rte_cryptodev_cb_list, rte_cryptodev_callback); + +/** The data structure associated with each crypto device. */ +struct rte_cryptodev { + dequeue_pkt_burst_t dequeue_burst; + /**< Pointer to PMD receive function. */ + enqueue_pkt_burst_t enqueue_burst; + /**< Pointer to PMD transmit function. */ + + const struct rte_cryptodev_driver *driver; + /**< Driver for this device */ + struct rte_cryptodev_data *data; + /**< Pointer to device data */ + struct rte_cryptodev_ops *dev_ops; + /**< Functions exported by PMD */ + uint64_t feature_flags; + /**< Supported features */ + struct rte_pci_device *pci_dev; + /**< PCI info. supplied by probing */ + + enum rte_cryptodev_type dev_type; + /**< Crypto device type */ + enum pmd_type pmd_type; + /**< PMD type - PDEV / VDEV */ + + struct rte_cryptodev_cb_list link_intr_cbs; + /**< User application callback for interrupts if present */ + + uint8_t attached : 1; + /**< Flag indicating the device is attached */ +} __rte_cache_aligned; + + +#define RTE_CRYPTODEV_NAME_MAX_LEN (64) +/**< Max length of name of crypto PMD */ + +/** + * + * The data part, with no function pointers, associated with each device. + * + * This structure is safe to place in shared memory to be common among + * different processes in a multi-process configuration. + */ +struct rte_cryptodev_data { + uint8_t dev_id; + /**< Device ID for this instance */ + uint8_t socket_id; + /**< Socket ID where memory is allocated */ + char name[RTE_CRYPTODEV_NAME_MAX_LEN]; + /**< Unique identifier name */ + + uint8_t dev_started : 1; + /**< Device state: STARTED(1)/STOPPED(0) */ + + struct rte_mempool *session_pool; + /**< Session memory pool */ + void **queue_pairs; + /**< Array of pointers to queue pairs. */ + uint16_t nb_queue_pairs; + /**< Number of device queue pairs. */ + + void *dev_private; + /**< PMD-specific private data */ +} __rte_cache_aligned; + +extern struct rte_cryptodev *rte_cryptodevs; +/** + * + * Dequeue a burst of processed crypto operations from a queue on the crypto + * device. The dequeued operation are stored in *rte_crypto_op* structures + * whose pointers are supplied in the *ops* array. + * + * The rte_cryptodev_dequeue_burst() function returns the number of ops + * actually dequeued, which is the number of *rte_crypto_op* data structures + * effectively supplied into the *ops* array. + * + * A return value equal to *nb_ops* indicates that the queue contained + * at least *nb_ops* operations, and this is likely to signify that other + * processed operations remain in the devices output queue. Applications + * implementing a "retrieve as many processed operations as possible" policy + * can check this specific case and keep invoking the + * rte_cryptodev_dequeue_burst() function until a value less than + * *nb_ops* is returned. + * + * The rte_cryptodev_dequeue_burst() function does not provide any error + * notification to avoid the corresponding overhead. + * + * @param dev_id The symmetric crypto device identifier + * @param qp_id The index of the queue pair from which to + * retrieve processed packets. The value must be + * in the range [0, nb_queue_pair - 1] previously + * supplied to rte_cryptodev_configure(). + * @param ops The address of an array of pointers to + * *rte_crypto_op* structures that must be + * large enough to store *nb_ops* pointers in it. + * @param nb_ops The maximum number of operations to dequeue. + * + * @return + * - The number of operations actually dequeued, which is the number + * of pointers to *rte_crypto_op* structures effectively supplied to the + * *ops* array. + */ +static inline uint16_t +rte_cryptodev_dequeue_burst(uint8_t dev_id, uint16_t qp_id, + struct rte_crypto_op **ops, uint16_t nb_ops) +{ + struct rte_cryptodev *dev = &rte_cryptodevs[dev_id]; + + nb_ops = (*dev->dequeue_burst) + (dev->data->queue_pairs[qp_id], ops, nb_ops); + + return nb_ops; +} + +/** + * Enqueue a burst of operations for processing on a crypto device. + * + * The rte_cryptodev_enqueue_burst() function is invoked to place + * crypto operations on the queue *qp_id* of the device designated by + * its *dev_id*. + * + * The *nb_ops* parameter is the number of operations to process which are + * supplied in the *ops* array of *rte_crypto_op* structures. + * + * The rte_cryptodev_enqueue_burst() function returns the number of + * operations it actually enqueued for processing. A return value equal to + * *nb_ops* means that all packets have been enqueued. + * + * @param dev_id The identifier of the device. + * @param qp_id The index of the queue pair which packets are + * to be enqueued for processing. The value + * must be in the range [0, nb_queue_pairs - 1] + * previously supplied to + * *rte_cryptodev_configure*. + * @param ops The address of an array of *nb_ops* pointers + * to *rte_crypto_op* structures which contain + * the crypto operations to be processed. + * @param nb_ops The number of operations to process. + * + * @return + * The number of operations actually enqueued on the crypto device. The return + * value can be less than the value of the *nb_ops* parameter when the + * crypto devices queue is full or if invalid parameters are specified in + * a *rte_crypto_op*. + */ +static inline uint16_t +rte_cryptodev_enqueue_burst(uint8_t dev_id, uint16_t qp_id, + struct rte_crypto_op **ops, uint16_t nb_ops) +{ + struct rte_cryptodev *dev = &rte_cryptodevs[dev_id]; + + return (*dev->enqueue_burst)( + dev->data->queue_pairs[qp_id], ops, nb_ops); +} + + +/** Cryptodev symmetric crypto session */ +struct rte_cryptodev_sym_session { + struct { + uint8_t dev_id; + /**< Device Id */ + enum rte_cryptodev_type dev_type; + /** Crypto Device type session created on */ + struct rte_mempool *mp; + /**< Mempool session allocated from */ + } __rte_aligned(8); + /**< Public symmetric session details */ + + char _private[0]; + /**< Private session material */ +}; + + +/** + * Initialise a session for symmetric cryptographic operations. + * + * This function is used by the client to initialize immutable + * parameters of symmetric cryptographic operation. + * To perform the operation the rte_cryptodev_enqueue_burst function is + * used. Each mbuf should contain a reference to the session + * pointer returned from this function contained within it's crypto_op if a + * session-based operation is being provisioned. Memory to contain the session + * information is allocated from within mempool managed by the cryptodev. + * + * The rte_cryptodev_session_free must be called to free allocated + * memory when the session is no longer required. + * + * @param dev_id The device identifier. + * @param xform Crypto transform chain. + + * + * @return + * Pointer to the created session or NULL + */ +extern struct rte_cryptodev_sym_session * +rte_cryptodev_sym_session_create(uint8_t dev_id, + struct rte_crypto_sym_xform *xform); + +/** + * Free the memory associated with a previously allocated session. + * + * @param dev_id The device identifier. + * @param session Session pointer previously allocated by + * *rte_cryptodev_sym_session_create*. + * + * @return + * NULL on successful freeing of session. + * Session pointer on failure to free session. + */ +extern struct rte_cryptodev_sym_session * +rte_cryptodev_sym_session_free(uint8_t dev_id, + struct rte_cryptodev_sym_session *session); + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CRYPTODEV_H_ */ diff --git a/lib/librte_cryptodev/rte_cryptodev_pmd.h b/lib/librte_cryptodev/rte_cryptodev_pmd.h new file mode 100644 index 00000000..7d049ea3 --- /dev/null +++ b/lib/librte_cryptodev/rte_cryptodev_pmd.h @@ -0,0 +1,543 @@ +/*- + * + * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRYPTODEV_PMD_H_ +#define _RTE_CRYPTODEV_PMD_H_ + +/** @file + * RTE Crypto PMD APIs + * + * @note + * These API are from crypto PMD only and user applications should not call + * them directly. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "rte_crypto.h" +#include "rte_cryptodev.h" + + +#ifdef RTE_LIBRTE_CRYPTODEV_DEBUG +#define RTE_PMD_DEBUG_TRACE(...) \ + rte_pmd_debug_trace(__func__, __VA_ARGS__) +#else +#define RTE_PMD_DEBUG_TRACE(fmt, args...) +#endif + +struct rte_cryptodev_session { + struct { + uint8_t dev_id; + enum rte_cryptodev_type type; + struct rte_mempool *mp; + } __rte_aligned(8); + + char _private[0]; +}; + +struct rte_cryptodev_driver; + +/** + * Initialisation function of a crypto driver invoked for each matching + * crypto PCI device detected during the PCI probing phase. + * + * @param drv The pointer to the [matching] crypto driver structure + * supplied by the PMD when it registered itself. + * @param dev The dev pointer is the address of the *rte_cryptodev* + * structure associated with the matching device and which + * has been [automatically] allocated in the + * *rte_crypto_devices* array. + * + * @return + * - 0: Success, the device is properly initialised by the driver. + * In particular, the driver MUST have set up the *dev_ops* pointer + * of the *dev* structure. + * - <0: Error code of the device initialisation failure. + */ +typedef int (*cryptodev_init_t)(struct rte_cryptodev_driver *drv, + struct rte_cryptodev *dev); + +/** + * Finalisation function of a driver invoked for each matching + * PCI device detected during the PCI closing phase. + * + * @param drv The pointer to the [matching] driver structure supplied + * by the PMD when it registered itself. + * @param dev The dev pointer is the address of the *rte_cryptodev* + * structure associated with the matching device and which + * has been [automatically] allocated in the + * *rte_crypto_devices* array. + * + * * @return + * - 0: Success, the device is properly finalised by the driver. + * In particular, the driver MUST free the *dev_ops* pointer + * of the *dev* structure. + * - <0: Error code of the device initialisation failure. + */ +typedef int (*cryptodev_uninit_t)(const struct rte_cryptodev_driver *drv, + struct rte_cryptodev *dev); + +/** + * The structure associated with a PMD driver. + * + * Each driver acts as a PCI driver and is represented by a generic + * *crypto_driver* structure that holds: + * + * - An *rte_pci_driver* structure (which must be the first field). + * + * - The *cryptodev_init* function invoked for each matching PCI device. + * + * - The size of the private data to allocate for each matching device. + */ +struct rte_cryptodev_driver { + struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ + unsigned dev_private_size; /**< Size of device private data. */ + + cryptodev_init_t cryptodev_init; /**< Device init function. */ + cryptodev_uninit_t cryptodev_uninit; /**< Device uninit function. */ +}; + + +/** Global structure used for maintaining state of allocated crypto devices */ +struct rte_cryptodev_global { + struct rte_cryptodev *devs; /**< Device information array */ + struct rte_cryptodev_data *data[RTE_CRYPTO_MAX_DEVS]; + /**< Device private data */ + uint8_t nb_devs; /**< Number of devices found */ + uint8_t max_devs; /**< Max number of devices */ +}; + +/** pointer to global crypto devices data structure. */ +extern struct rte_cryptodev_global *rte_cryptodev_globals; + +/** + * Get the rte_cryptodev structure device pointer for the device. Assumes a + * valid device index. + * + * @param dev_id Device ID value to select the device structure. + * + * @return + * - The rte_cryptodev structure pointer for the given device ID. + */ +static inline struct rte_cryptodev * +rte_cryptodev_pmd_get_dev(uint8_t dev_id) +{ + return &rte_cryptodev_globals->devs[dev_id]; +} + +/** + * Get the rte_cryptodev structure device pointer for the named device. + * + * @param name device name to select the device structure. + * + * @return + * - The rte_cryptodev structure pointer for the given device ID. + */ +static inline struct rte_cryptodev * +rte_cryptodev_pmd_get_named_dev(const char *name) +{ + struct rte_cryptodev *dev; + unsigned i; + + if (name == NULL) + return NULL; + + for (i = 0, dev = &rte_cryptodev_globals->devs[i]; + i < rte_cryptodev_globals->max_devs; i++) { + if ((dev->attached == RTE_CRYPTODEV_ATTACHED) && + (strcmp(dev->data->name, name) == 0)) + return dev; + } + + return NULL; +} + +/** + * Validate if the crypto device index is valid attached crypto device. + * + * @param dev_id Crypto device index. + * + * @return + * - If the device index is valid (1) or not (0). + */ +static inline unsigned +rte_cryptodev_pmd_is_valid_dev(uint8_t dev_id) +{ + struct rte_cryptodev *dev = NULL; + + if (dev_id >= rte_cryptodev_globals->nb_devs) + return 0; + + dev = rte_cryptodev_pmd_get_dev(dev_id); + if (dev->attached != RTE_CRYPTODEV_ATTACHED) + return 0; + else + return 1; +} + +/** + * The pool of rte_cryptodev structures. + */ +extern struct rte_cryptodev *rte_cryptodevs; + + +/** + * Definitions of all functions exported by a driver through the + * the generic structure of type *crypto_dev_ops* supplied in the + * *rte_cryptodev* structure associated with a device. + */ + +/** + * Function used to configure device. + * + * @param dev Crypto device pointer + * + * @return Returns 0 on success + */ +typedef int (*cryptodev_configure_t)(struct rte_cryptodev *dev); + +/** + * Function used to start a configured device. + * + * @param dev Crypto device pointer + * + * @return Returns 0 on success + */ +typedef int (*cryptodev_start_t)(struct rte_cryptodev *dev); + +/** + * Function used to stop a configured device. + * + * @param dev Crypto device pointer + */ +typedef void (*cryptodev_stop_t)(struct rte_cryptodev *dev); + +/** + * Function used to close a configured device. + * + * @param dev Crypto device pointer + * @return + * - 0 on success. + * - EAGAIN if can't close as device is busy + */ +typedef int (*cryptodev_close_t)(struct rte_cryptodev *dev); + + +/** + * Function used to get statistics of a device. + * + * @param dev Crypto device pointer + * @param stats Pointer to crypto device stats structure to populate + */ +typedef void (*cryptodev_stats_get_t)(struct rte_cryptodev *dev, + struct rte_cryptodev_stats *stats); + + +/** + * Function used to reset statistics of a device. + * + * @param dev Crypto device pointer + */ +typedef void (*cryptodev_stats_reset_t)(struct rte_cryptodev *dev); + + +/** + * Function used to get specific information of a device. + * + * @param dev Crypto device pointer + */ +typedef void (*cryptodev_info_get_t)(struct rte_cryptodev *dev, + struct rte_cryptodev_info *dev_info); + +/** + * Start queue pair of a device. + * + * @param dev Crypto device pointer + * @param qp_id Queue Pair Index + * + * @return Returns 0 on success. + */ +typedef int (*cryptodev_queue_pair_start_t)(struct rte_cryptodev *dev, + uint16_t qp_id); + +/** + * Stop queue pair of a device. + * + * @param dev Crypto device pointer + * @param qp_id Queue Pair Index + * + * @return Returns 0 on success. + */ +typedef int (*cryptodev_queue_pair_stop_t)(struct rte_cryptodev *dev, + uint16_t qp_id); + +/** + * Setup a queue pair for a device. + * + * @param dev Crypto device pointer + * @param qp_id Queue Pair Index + * @param qp_conf Queue configuration structure + * @param socket_id Socket Index + * + * @return Returns 0 on success. + */ +typedef int (*cryptodev_queue_pair_setup_t)(struct rte_cryptodev *dev, + uint16_t qp_id, const struct rte_cryptodev_qp_conf *qp_conf, + int socket_id); + +/** + * Release memory resources allocated by given queue pair. + * + * @param dev Crypto device pointer + * @param qp_id Queue Pair Index + * + * @return + * - 0 on success. + * - EAGAIN if can't close as device is busy + */ +typedef int (*cryptodev_queue_pair_release_t)(struct rte_cryptodev *dev, + uint16_t qp_id); + +/** + * Get number of available queue pairs of a device. + * + * @param dev Crypto device pointer + * + * @return Returns number of queue pairs on success. + */ +typedef uint32_t (*cryptodev_queue_pair_count_t)(struct rte_cryptodev *dev); + +/** + * Create a session mempool to allocate sessions from + * + * @param dev Crypto device pointer + * @param nb_objs number of sessions objects in mempool + * @param obj_cache l-core object cache size, see *rte_ring_create* + * @param socket_id Socket Id to allocate mempool on. + * + * @return + * - On success returns a pointer to a rte_mempool + * - On failure returns a NULL pointer + */ +typedef int (*cryptodev_sym_create_session_pool_t)( + struct rte_cryptodev *dev, unsigned nb_objs, + unsigned obj_cache_size, int socket_id); + + +/** + * Get the size of a cryptodev session + * + * @param dev Crypto device pointer + * + * @return + * - On success returns the size of the session structure for device + * - On failure returns 0 + */ +typedef unsigned (*cryptodev_sym_get_session_private_size_t)( + struct rte_cryptodev *dev); + +/** + * Initialize a Crypto session on a device. + * + * @param dev Crypto device pointer + * @param xform Single or chain of crypto xforms + * @param priv_sess Pointer to cryptodev's private session structure + * + * @return + * - Returns private session structure on success. + * - Returns NULL on failure. + */ +typedef void (*cryptodev_sym_initialize_session_t)(struct rte_mempool *mempool, + void *session_private); + +/** + * Configure a Crypto session on a device. + * + * @param dev Crypto device pointer + * @param xform Single or chain of crypto xforms + * @param priv_sess Pointer to cryptodev's private session structure + * + * @return + * - Returns private session structure on success. + * - Returns NULL on failure. + */ +typedef void * (*cryptodev_sym_configure_session_t)(struct rte_cryptodev *dev, + struct rte_crypto_sym_xform *xform, void *session_private); + +/** + * Free Crypto session. + * @param session Cryptodev session structure to free + */ +typedef void (*cryptodev_sym_free_session_t)(struct rte_cryptodev *dev, + void *session_private); + + +/** Crypto device operations function pointer table */ +struct rte_cryptodev_ops { + cryptodev_configure_t dev_configure; /**< Configure device. */ + cryptodev_start_t dev_start; /**< Start device. */ + cryptodev_stop_t dev_stop; /**< Stop device. */ + cryptodev_close_t dev_close; /**< Close device. */ + + cryptodev_info_get_t dev_infos_get; /**< Get device info. */ + + cryptodev_stats_get_t stats_get; + /**< Get device statistics. */ + cryptodev_stats_reset_t stats_reset; + /**< Reset device statistics. */ + + cryptodev_queue_pair_setup_t queue_pair_setup; + /**< Set up a device queue pair. */ + cryptodev_queue_pair_release_t queue_pair_release; + /**< Release a queue pair. */ + cryptodev_queue_pair_start_t queue_pair_start; + /**< Start a queue pair. */ + cryptodev_queue_pair_stop_t queue_pair_stop; + /**< Stop a queue pair. */ + cryptodev_queue_pair_count_t queue_pair_count; + /**< Get count of the queue pairs. */ + + cryptodev_sym_get_session_private_size_t session_get_size; + /**< Return private session. */ + cryptodev_sym_initialize_session_t session_initialize; + /**< Initialization function for private session data */ + cryptodev_sym_configure_session_t session_configure; + /**< Configure a Crypto session. */ + cryptodev_sym_free_session_t session_clear; + /**< Clear a Crypto sessions private data. */ +}; + + +/** + * Function for internal use by dummy drivers primarily, e.g. ring-based + * driver. + * Allocates a new cryptodev slot for an crypto device and returns the pointer + * to that slot for the driver to use. + * + * @param name Unique identifier name for each device + * @param type Device type of this Crypto device + * @param socket_id Socket to allocate resources on. + * @return + * - Slot in the rte_dev_devices array for a new device; + */ +struct rte_cryptodev * +rte_cryptodev_pmd_allocate(const char *name, enum pmd_type type, int socket_id); + +/** + * Creates a new virtual crypto device and returns the pointer + * to that device. + * + * @param name PMD type name + * @param dev_private_size Size of crypto PMDs private data + * @param socket_id Socket to allocate resources on. + * + * @return + * - Cryptodev pointer if device is successfully created. + * - NULL if device cannot be created. + */ +struct rte_cryptodev * +rte_cryptodev_pmd_virtual_dev_init(const char *name, size_t dev_private_size, + int socket_id); + + +/** + * Function for internal use by dummy drivers primarily, e.g. ring-based + * driver. + * Release the specified cryptodev device. + * + * @param cryptodev + * The *cryptodev* pointer is the address of the *rte_cryptodev* structure. + * @return + * - 0 on success, negative on error + */ +extern int +rte_cryptodev_pmd_release_device(struct rte_cryptodev *cryptodev); + + +/** + * Register a Crypto [Poll Mode] driver. + * + * Function invoked by the initialization function of a Crypto driver + * to simultaneously register itself as Crypto Poll Mode Driver and to either: + * + * a - register itself as PCI driver if the crypto device is a physical + * device, by invoking the rte_eal_pci_register() function to + * register the *pci_drv* structure embedded in the *crypto_drv* + * structure, after having stored the address of the + * rte_cryptodev_init() function in the *devinit* field of the + * *pci_drv* structure. + * + * During the PCI probing phase, the rte_cryptodev_init() + * function is invoked for each PCI [device] matching the + * embedded PCI identifiers provided by the driver. + * + * b, complete the initialization sequence if the device is a virtual + * device by calling the rte_cryptodev_init() directly passing a + * NULL parameter for the rte_pci_device structure. + * + * @param crypto_drv crypto_driver structure associated with the crypto + * driver. + * @param type pmd type + */ +extern int +rte_cryptodev_pmd_driver_register(struct rte_cryptodev_driver *crypto_drv, + enum pmd_type type); + +/** + * Executes all the user application registered callbacks for the specific + * device. + * * + * @param dev Pointer to cryptodev struct + * @param event Crypto device interrupt event type. + * + * @return + * void + */ +void rte_cryptodev_pmd_callback_process(struct rte_cryptodev *dev, + enum rte_cryptodev_event_type event); + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CRYPTODEV_PMD_H_ */ diff --git a/lib/librte_cryptodev/rte_cryptodev_version.map b/lib/librte_cryptodev/rte_cryptodev_version.map new file mode 100644 index 00000000..41004e1c --- /dev/null +++ b/lib/librte_cryptodev/rte_cryptodev_version.map @@ -0,0 +1,34 @@ +DPDK_16.04 { + global: + + rte_cryptodevs; + rte_cryptodev_callback_register; + rte_cryptodev_callback_unregister; + rte_cryptodev_close; + rte_cryptodev_count; + rte_cryptodev_count_devtype; + rte_cryptodev_configure; + rte_cryptodev_create_vdev; + rte_cryptodev_get_dev_id; + rte_cryptodev_get_feature_name; + rte_cryptodev_info_get; + rte_cryptodev_pmd_allocate; + rte_cryptodev_pmd_callback_process; + rte_cryptodev_pmd_driver_register; + rte_cryptodev_pmd_release_device; + rte_cryptodev_pmd_virtual_dev_init; + rte_cryptodev_sym_session_create; + rte_cryptodev_sym_session_free; + rte_cryptodev_socket_id; + rte_cryptodev_start; + rte_cryptodev_stats_get; + rte_cryptodev_stats_reset; + rte_cryptodev_stop; + rte_cryptodev_queue_pair_count; + rte_cryptodev_queue_pair_setup; + rte_cryptodev_queue_pair_start; + rte_cryptodev_queue_pair_stop; + rte_crypto_op_pool_create; + + local: *; +}; diff --git a/lib/librte_distributor/Makefile b/lib/librte_distributor/Makefile new file mode 100644 index 00000000..4c9af172 --- /dev/null +++ b/lib/librte_distributor/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_distributor.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_distributor_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) := rte_distributor.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR)-include := rte_distributor.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += lib/librte_eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_DISTRIBUTOR) += lib/librte_mbuf + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_distributor/rte_distributor.c b/lib/librte_distributor/rte_distributor.c new file mode 100644 index 00000000..f3f778c9 --- /dev/null +++ b/lib/librte_distributor/rte_distributor.c @@ -0,0 +1,487 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rte_distributor.h" + +#define NO_FLAGS 0 +#define RTE_DISTRIB_PREFIX "DT_" + +/* we will use the bottom four bits of pointer for flags, shifting out + * the top four bits to make room (since a 64-bit pointer actually only uses + * 48 bits). An arithmetic-right-shift will then appropriately restore the + * original pointer value with proper sign extension into the top bits. */ +#define RTE_DISTRIB_FLAG_BITS 4 +#define RTE_DISTRIB_FLAGS_MASK (0x0F) +#define RTE_DISTRIB_NO_BUF 0 /**< empty flags: no buffer requested */ +#define RTE_DISTRIB_GET_BUF (1) /**< worker requests a buffer, returns old */ +#define RTE_DISTRIB_RETURN_BUF (2) /**< worker returns a buffer, no request */ + +#define RTE_DISTRIB_BACKLOG_SIZE 8 +#define RTE_DISTRIB_BACKLOG_MASK (RTE_DISTRIB_BACKLOG_SIZE - 1) + +#define RTE_DISTRIB_MAX_RETURNS 128 +#define RTE_DISTRIB_RETURNS_MASK (RTE_DISTRIB_MAX_RETURNS - 1) + +/** + * Maximum number of workers allowed. + * Be aware of increasing the limit, becaus it is limited by how we track + * in-flight tags. See @in_flight_bitmask and @rte_distributor_process + */ +#define RTE_DISTRIB_MAX_WORKERS 64 + +/** + * Buffer structure used to pass the pointer data between cores. This is cache + * line aligned, but to improve performance and prevent adjacent cache-line + * prefetches of buffers for other workers, e.g. when worker 1's buffer is on + * the next cache line to worker 0, we pad this out to three cache lines. + * Only 64-bits of the memory is actually used though. + */ +union rte_distributor_buffer { + volatile int64_t bufptr64; + char pad[RTE_CACHE_LINE_SIZE*3]; +} __rte_cache_aligned; + +struct rte_distributor_backlog { + unsigned start; + unsigned count; + int64_t pkts[RTE_DISTRIB_BACKLOG_SIZE]; +}; + +struct rte_distributor_returned_pkts { + unsigned start; + unsigned count; + struct rte_mbuf *mbufs[RTE_DISTRIB_MAX_RETURNS]; +}; + +struct rte_distributor { + TAILQ_ENTRY(rte_distributor) next; /**< Next in list. */ + + char name[RTE_DISTRIBUTOR_NAMESIZE]; /**< Name of the ring. */ + unsigned num_workers; /**< Number of workers polling */ + + uint32_t in_flight_tags[RTE_DISTRIB_MAX_WORKERS]; + /**< Tracks the tag being processed per core */ + uint64_t in_flight_bitmask; + /**< on/off bits for in-flight tags. + * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 then + * the bitmask has to expand. + */ + + struct rte_distributor_backlog backlog[RTE_DISTRIB_MAX_WORKERS]; + + union rte_distributor_buffer bufs[RTE_DISTRIB_MAX_WORKERS]; + + struct rte_distributor_returned_pkts returns; +}; + +TAILQ_HEAD(rte_distributor_list, rte_distributor); + +static struct rte_tailq_elem rte_distributor_tailq = { + .name = "RTE_DISTRIBUTOR", +}; +EAL_REGISTER_TAILQ(rte_distributor_tailq) + +/**** APIs called by workers ****/ + +void +rte_distributor_request_pkt(struct rte_distributor *d, + unsigned worker_id, struct rte_mbuf *oldpkt) +{ + union rte_distributor_buffer *buf = &d->bufs[worker_id]; + int64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) + | RTE_DISTRIB_GET_BUF; + while (unlikely(buf->bufptr64 & RTE_DISTRIB_FLAGS_MASK)) + rte_pause(); + buf->bufptr64 = req; +} + +struct rte_mbuf * +rte_distributor_poll_pkt(struct rte_distributor *d, + unsigned worker_id) +{ + union rte_distributor_buffer *buf = &d->bufs[worker_id]; + if (buf->bufptr64 & RTE_DISTRIB_GET_BUF) + return NULL; + + /* since bufptr64 is signed, this should be an arithmetic shift */ + int64_t ret = buf->bufptr64 >> RTE_DISTRIB_FLAG_BITS; + return (struct rte_mbuf *)((uintptr_t)ret); +} + +struct rte_mbuf * +rte_distributor_get_pkt(struct rte_distributor *d, + unsigned worker_id, struct rte_mbuf *oldpkt) +{ + struct rte_mbuf *ret; + rte_distributor_request_pkt(d, worker_id, oldpkt); + while ((ret = rte_distributor_poll_pkt(d, worker_id)) == NULL) + rte_pause(); + return ret; +} + +int +rte_distributor_return_pkt(struct rte_distributor *d, + unsigned worker_id, struct rte_mbuf *oldpkt) +{ + union rte_distributor_buffer *buf = &d->bufs[worker_id]; + uint64_t req = (((int64_t)(uintptr_t)oldpkt) << RTE_DISTRIB_FLAG_BITS) + | RTE_DISTRIB_RETURN_BUF; + buf->bufptr64 = req; + return 0; +} + +/**** APIs called on distributor core ***/ + +/* as name suggests, adds a packet to the backlog for a particular worker */ +static int +add_to_backlog(struct rte_distributor_backlog *bl, int64_t item) +{ + if (bl->count == RTE_DISTRIB_BACKLOG_SIZE) + return -1; + + bl->pkts[(bl->start + bl->count++) & (RTE_DISTRIB_BACKLOG_MASK)] + = item; + return 0; +} + +/* takes the next packet for a worker off the backlog */ +static int64_t +backlog_pop(struct rte_distributor_backlog *bl) +{ + bl->count--; + return bl->pkts[bl->start++ & RTE_DISTRIB_BACKLOG_MASK]; +} + +/* stores a packet returned from a worker inside the returns array */ +static inline void +store_return(uintptr_t oldbuf, struct rte_distributor *d, + unsigned *ret_start, unsigned *ret_count) +{ + /* store returns in a circular buffer - code is branch-free */ + d->returns.mbufs[(*ret_start + *ret_count) & RTE_DISTRIB_RETURNS_MASK] + = (void *)oldbuf; + *ret_start += (*ret_count == RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf); + *ret_count += (*ret_count != RTE_DISTRIB_RETURNS_MASK) & !!(oldbuf); +} + +static inline void +handle_worker_shutdown(struct rte_distributor *d, unsigned wkr) +{ + d->in_flight_tags[wkr] = 0; + d->in_flight_bitmask &= ~(1UL << wkr); + d->bufs[wkr].bufptr64 = 0; + if (unlikely(d->backlog[wkr].count != 0)) { + /* On return of a packet, we need to move the + * queued packets for this core elsewhere. + * Easiest solution is to set things up for + * a recursive call. That will cause those + * packets to be queued up for the next free + * core, i.e. it will return as soon as a + * core becomes free to accept the first + * packet, as subsequent ones will be added to + * the backlog for that core. + */ + struct rte_mbuf *pkts[RTE_DISTRIB_BACKLOG_SIZE]; + unsigned i; + struct rte_distributor_backlog *bl = &d->backlog[wkr]; + + for (i = 0; i < bl->count; i++) { + unsigned idx = (bl->start + i) & + RTE_DISTRIB_BACKLOG_MASK; + pkts[i] = (void *)((uintptr_t)(bl->pkts[idx] >> + RTE_DISTRIB_FLAG_BITS)); + } + /* recursive call. + * Note that the tags were set before first level call + * to rte_distributor_process. + */ + rte_distributor_process(d, pkts, i); + bl->count = bl->start = 0; + } +} + +/* this function is called when process() fn is called without any new + * packets. It goes through all the workers and clears any returned packets + * to do a partial flush. + */ +static int +process_returns(struct rte_distributor *d) +{ + unsigned wkr; + unsigned flushed = 0; + unsigned ret_start = d->returns.start, + ret_count = d->returns.count; + + for (wkr = 0; wkr < d->num_workers; wkr++) { + + const int64_t data = d->bufs[wkr].bufptr64; + uintptr_t oldbuf = 0; + + if (data & RTE_DISTRIB_GET_BUF) { + flushed++; + if (d->backlog[wkr].count) + d->bufs[wkr].bufptr64 = + backlog_pop(&d->backlog[wkr]); + else { + d->bufs[wkr].bufptr64 = RTE_DISTRIB_GET_BUF; + d->in_flight_tags[wkr] = 0; + d->in_flight_bitmask &= ~(1UL << wkr); + } + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } else if (data & RTE_DISTRIB_RETURN_BUF) { + handle_worker_shutdown(d, wkr); + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } + + store_return(oldbuf, d, &ret_start, &ret_count); + } + + d->returns.start = ret_start; + d->returns.count = ret_count; + + return flushed; +} + +/* process a set of packets to distribute them to workers */ +int +rte_distributor_process(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned num_mbufs) +{ + unsigned next_idx = 0; + unsigned wkr = 0; + struct rte_mbuf *next_mb = NULL; + int64_t next_value = 0; + uint32_t new_tag = 0; + unsigned ret_start = d->returns.start, + ret_count = d->returns.count; + + if (unlikely(num_mbufs == 0)) + return process_returns(d); + + while (next_idx < num_mbufs || next_mb != NULL) { + + int64_t data = d->bufs[wkr].bufptr64; + uintptr_t oldbuf = 0; + + if (!next_mb) { + next_mb = mbufs[next_idx++]; + next_value = (((int64_t)(uintptr_t)next_mb) + << RTE_DISTRIB_FLAG_BITS); + /* + * User is advocated to set tag vaue for each + * mbuf before calling rte_distributor_process. + * User defined tags are used to identify flows, + * or sessions. + */ + new_tag = next_mb->hash.usr; + + /* + * Note that if RTE_DISTRIB_MAX_WORKERS is larger than 64 + * then the size of match has to be expanded. + */ + uint64_t match = 0; + unsigned i; + /* + * to scan for a match use "xor" and "not" to get a 0/1 + * value, then use shifting to merge to single "match" + * variable, where a one-bit indicates a match for the + * worker given by the bit-position + */ + for (i = 0; i < d->num_workers; i++) + match |= (!(d->in_flight_tags[i] ^ new_tag) + << i); + + /* Only turned-on bits are considered as match */ + match &= d->in_flight_bitmask; + + if (match) { + next_mb = NULL; + unsigned worker = __builtin_ctzl(match); + if (add_to_backlog(&d->backlog[worker], + next_value) < 0) + next_idx--; + } + } + + if ((data & RTE_DISTRIB_GET_BUF) && + (d->backlog[wkr].count || next_mb)) { + + if (d->backlog[wkr].count) + d->bufs[wkr].bufptr64 = + backlog_pop(&d->backlog[wkr]); + + else { + d->bufs[wkr].bufptr64 = next_value; + d->in_flight_tags[wkr] = new_tag; + d->in_flight_bitmask |= (1UL << wkr); + next_mb = NULL; + } + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } else if (data & RTE_DISTRIB_RETURN_BUF) { + handle_worker_shutdown(d, wkr); + oldbuf = data >> RTE_DISTRIB_FLAG_BITS; + } + + /* store returns in a circular buffer */ + store_return(oldbuf, d, &ret_start, &ret_count); + + if (++wkr == d->num_workers) + wkr = 0; + } + /* to finish, check all workers for backlog and schedule work for them + * if they are ready */ + for (wkr = 0; wkr < d->num_workers; wkr++) + if (d->backlog[wkr].count && + (d->bufs[wkr].bufptr64 & RTE_DISTRIB_GET_BUF)) { + + int64_t oldbuf = d->bufs[wkr].bufptr64 >> + RTE_DISTRIB_FLAG_BITS; + store_return(oldbuf, d, &ret_start, &ret_count); + + d->bufs[wkr].bufptr64 = backlog_pop(&d->backlog[wkr]); + } + + d->returns.start = ret_start; + d->returns.count = ret_count; + return num_mbufs; +} + +/* return to the caller, packets returned from workers */ +int +rte_distributor_returned_pkts(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned max_mbufs) +{ + struct rte_distributor_returned_pkts *returns = &d->returns; + unsigned retval = (max_mbufs < returns->count) ? + max_mbufs : returns->count; + unsigned i; + + for (i = 0; i < retval; i++) { + unsigned idx = (returns->start + i) & RTE_DISTRIB_RETURNS_MASK; + mbufs[i] = returns->mbufs[idx]; + } + returns->start += i; + returns->count -= i; + + return retval; +} + +/* return the number of packets in-flight in a distributor, i.e. packets + * being workered on or queued up in a backlog. */ +static inline unsigned +total_outstanding(const struct rte_distributor *d) +{ + unsigned wkr, total_outstanding; + + total_outstanding = __builtin_popcountl(d->in_flight_bitmask); + + for (wkr = 0; wkr < d->num_workers; wkr++) + total_outstanding += d->backlog[wkr].count; + + return total_outstanding; +} + +/* flush the distributor, so that there are no outstanding packets in flight or + * queued up. */ +int +rte_distributor_flush(struct rte_distributor *d) +{ + const unsigned flushed = total_outstanding(d); + + while (total_outstanding(d) > 0) + rte_distributor_process(d, NULL, 0); + + return flushed; +} + +/* clears the internal returns array in the distributor */ +void +rte_distributor_clear_returns(struct rte_distributor *d) +{ + d->returns.start = d->returns.count = 0; +#ifndef __OPTIMIZE__ + memset(d->returns.mbufs, 0, sizeof(d->returns.mbufs)); +#endif +} + +/* creates a distributor instance */ +struct rte_distributor * +rte_distributor_create(const char *name, + unsigned socket_id, + unsigned num_workers) +{ + struct rte_distributor *d; + struct rte_distributor_list *distributor_list; + char mz_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + + /* compilation-time checks */ + RTE_BUILD_BUG_ON((sizeof(*d) & RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((RTE_DISTRIB_MAX_WORKERS & 7) != 0); + RTE_BUILD_BUG_ON(RTE_DISTRIB_MAX_WORKERS > + sizeof(d->in_flight_bitmask) * CHAR_BIT); + + if (name == NULL || num_workers >= RTE_DISTRIB_MAX_WORKERS) { + rte_errno = EINVAL; + return NULL; + } + + snprintf(mz_name, sizeof(mz_name), RTE_DISTRIB_PREFIX"%s", name); + mz = rte_memzone_reserve(mz_name, sizeof(*d), socket_id, NO_FLAGS); + if (mz == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + d = mz->addr; + snprintf(d->name, sizeof(d->name), "%s", name); + d->num_workers = num_workers; + + distributor_list = RTE_TAILQ_CAST(rte_distributor_tailq.head, + rte_distributor_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_INSERT_TAIL(distributor_list, d, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return d; +} diff --git a/lib/librte_distributor/rte_distributor.h b/lib/librte_distributor/rte_distributor.h new file mode 100644 index 00000000..7d36bc8a --- /dev/null +++ b/lib/librte_distributor/rte_distributor.h @@ -0,0 +1,247 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DISTRIBUTE_H_ +#define _RTE_DISTRIBUTE_H_ + +/** + * @file + * RTE distributor + * + * The distributor is a component which is designed to pass packets + * one-at-a-time to workers, with dynamic load balancing. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_DISTRIBUTOR_NAMESIZE 32 /**< Length of name for instance */ + +struct rte_distributor; +struct rte_mbuf; + +/** + * Function to create a new distributor instance + * + * Reserves the memory needed for the distributor operation and + * initializes the distributor to work with the configured number of workers. + * + * @param name + * The name to be given to the distributor instance. + * @param socket_id + * The NUMA node on which the memory is to be allocated + * @param num_workers + * The maximum number of workers that will request packets from this + * distributor + * @return + * The newly created distributor instance + */ +struct rte_distributor * +rte_distributor_create(const char *name, unsigned socket_id, + unsigned num_workers); + +/* *** APIS to be called on the distributor lcore *** */ +/* + * The following APIs are the public APIs which are designed for use on a + * single lcore which acts as the distributor lcore for a given distributor + * instance. These functions cannot be called on multiple cores simultaneously + * without using locking to protect access to the internals of the distributor. + * + * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore + * for the same distributor instance, otherwise deadlock will result. + */ + +/** + * Process a set of packets by distributing them among workers that request + * packets. The distributor will ensure that no two packets that have the + * same flow id, or tag, in the mbuf will be procesed at the same time. + * + * The user is advocated to set tag for each mbuf before calling this function. + * If user doesn't set the tag, the tag value can be various values depending on + * driver implementation and configuration. + * + * This is not multi-thread safe and should only be called on a single lcore. + * + * @param d + * The distributor instance to be used + * @param mbufs + * The mbufs to be distributed + * @param num_mbufs + * The number of mbufs in the mbufs array + * @return + * The number of mbufs processed. + */ +int +rte_distributor_process(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned num_mbufs); + +/** + * Get a set of mbufs that have been returned to the distributor by workers + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + * @param mbufs + * The mbufs pointer array to be filled in + * @param max_mbufs + * The size of the mbufs array + * @return + * The number of mbufs returned in the mbufs array. + */ +int +rte_distributor_returned_pkts(struct rte_distributor *d, + struct rte_mbuf **mbufs, unsigned max_mbufs); + +/** + * Flush the distributor component, so that there are no in-flight or + * backlogged packets awaiting processing + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + * @return + * The number of queued/in-flight packets that were completed by this call. + */ +int +rte_distributor_flush(struct rte_distributor *d); + +/** + * Clears the array of returned packets used as the source for the + * rte_distributor_returned_pkts() API call. + * + * This should only be called on the same lcore as rte_distributor_process() + * + * @param d + * The distributor instance to be used + */ +void +rte_distributor_clear_returns(struct rte_distributor *d); + +/* *** APIS to be called on the worker lcores *** */ +/* + * The following APIs are the public APIs which are designed for use on + * multiple lcores which act as workers for a distributor. Each lcore should use + * a unique worker id when requesting packets. + * + * NOTE: a given lcore cannot act as both a distributor lcore and a worker lcore + * for the same distributor instance, otherwise deadlock will result. + */ + +/** + * API called by a worker to get a new packet to process. Any previous packet + * given to the worker is assumed to have completed processing, and may be + * optionally returned to the distributor via the oldpkt parameter. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param oldpkt + * The previous packet, if any, being processed by the worker + * + * @return + * A new packet to be processed by the worker thread. + */ +struct rte_mbuf * +rte_distributor_get_pkt(struct rte_distributor *d, + unsigned worker_id, struct rte_mbuf *oldpkt); + +/** + * API called by a worker to return a completed packet without requesting a + * new packet, for example, because a worker thread is shutting down + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param mbuf + * The previous packet being processed by the worker + */ +int +rte_distributor_return_pkt(struct rte_distributor *d, unsigned worker_id, + struct rte_mbuf *mbuf); + +/** + * API called by a worker to request a new packet to process. + * Any previous packet given to the worker is assumed to have completed + * processing, and may be optionally returned to the distributor via + * the oldpkt parameter. + * Unlike rte_distributor_get_pkt(), this function does not wait for a new + * packet to be provided by the distributor. + * + * NOTE: after calling this function, rte_distributor_poll_pkt() should + * be used to poll for the packet requested. The rte_distributor_get_pkt() + * API should *not* be used to try and retrieve the new packet. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * @param oldpkt + * The previous packet, if any, being processed by the worker + */ +void +rte_distributor_request_pkt(struct rte_distributor *d, + unsigned worker_id, struct rte_mbuf *oldpkt); + +/** + * API called by a worker to check for a new packet that was previously + * requested by a call to rte_distributor_request_pkt(). It does not wait + * for the new packet to be available, but returns NULL if the request has + * not yet been fulfilled by the distributor. + * + * @param d + * The distributor instance to be used + * @param worker_id + * The worker instance number to use - must be less that num_workers passed + * at distributor creation time. + * + * @return + * A new packet to be processed by the worker thread, or NULL if no + * packet is yet available. + */ +struct rte_mbuf * +rte_distributor_poll_pkt(struct rte_distributor *d, + unsigned worker_id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_distributor/rte_distributor_version.map b/lib/librte_distributor/rte_distributor_version.map new file mode 100644 index 00000000..73fdc437 --- /dev/null +++ b/lib/librte_distributor/rte_distributor_version.map @@ -0,0 +1,15 @@ +DPDK_2.0 { + global: + + rte_distributor_clear_returns; + rte_distributor_create; + rte_distributor_flush; + rte_distributor_get_pkt; + rte_distributor_poll_pkt; + rte_distributor_process; + rte_distributor_request_pkt; + rte_distributor_return_pkt; + rte_distributor_returned_pkts; + + local: *; +}; diff --git a/lib/librte_eal/Makefile b/lib/librte_eal/Makefile new file mode 100644 index 00000000..cf11a099 --- /dev/null +++ b/lib/librte_eal/Makefile @@ -0,0 +1,38 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-y += common +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += linuxapp +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += bsdapp + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/bsdapp/Makefile b/lib/librte_eal/bsdapp/Makefile new file mode 100644 index 00000000..0e6e2be9 --- /dev/null +++ b/lib/librte_eal/bsdapp/Makefile @@ -0,0 +1,38 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += contigmem +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += nic_uio + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/bsdapp/contigmem/BSDmakefile b/lib/librte_eal/bsdapp/contigmem/BSDmakefile new file mode 100644 index 00000000..f64374c6 --- /dev/null +++ b/lib/librte_eal/bsdapp/contigmem/BSDmakefile @@ -0,0 +1,36 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +KMOD= contigmem +SRCS= contigmem.c device_if.h bus_if.h + +.include diff --git a/lib/librte_eal/bsdapp/contigmem/Makefile b/lib/librte_eal/bsdapp/contigmem/Makefile new file mode 100644 index 00000000..bab005fd --- /dev/null +++ b/lib/librte_eal/bsdapp/contigmem/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = contigmem + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -Winline -Wall -Werror +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h + +# +# all source are stored in SRCS-y +# +SRCS-y := contigmem.c + +include $(RTE_SDK)/mk/rte.bsdmodule.mk diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c new file mode 100644 index 00000000..c6ca3b9c --- /dev/null +++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c @@ -0,0 +1,232 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +static int contigmem_load(void); +static int contigmem_unload(void); +static int contigmem_physaddr(SYSCTL_HANDLER_ARGS); + +static d_mmap_t contigmem_mmap; +static d_mmap_single_t contigmem_mmap_single; +static d_open_t contigmem_open; + +static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS; +static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE; + +static eventhandler_tag contigmem_eh_tag; +static void *contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS]; +static struct cdev *contigmem_cdev = NULL; + +TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers); +TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size); + +static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem"); + +SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD, + &contigmem_num_buffers, 0, "Number of contigmem buffers allocated"); +SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD, + &contigmem_buffer_size, 0, "Size of each contiguous buffer"); + +static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0, + "physaddr"); + +MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations"); + +static int contigmem_modevent(module_t mod, int type, void *arg) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + error = contigmem_load(); + break; + case MOD_UNLOAD: + error = contigmem_unload(); + break; + default: + break; + } + + return error; +} + +moduledata_t contigmem_mod = { + "contigmem", + (modeventhand_t)contigmem_modevent, + 0 +}; + +DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY); +MODULE_VERSION(contigmem, 1); + +static struct cdevsw contigmem_ops = { + .d_name = "contigmem", + .d_version = D_VERSION, + .d_mmap = contigmem_mmap, + .d_mmap_single = contigmem_mmap_single, + .d_open = contigmem_open, +}; + +static int +contigmem_load() +{ + char index_string[8], description[32]; + int i; + + if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) { + printf("%d buffers requested is greater than %d allowed\n", + contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS); + return EINVAL; + } + + if (contigmem_buffer_size < PAGE_SIZE || + (contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) { + printf("buffer size 0x%lx is not greater than PAGE_SIZE and " + "power of two\n", contigmem_buffer_size); + return EINVAL; + } + + for (i = 0; i < contigmem_num_buffers; i++) { + contigmem_buffers[i] = + contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, 0, + BUS_SPACE_MAXADDR, contigmem_buffer_size, 0); + + if (contigmem_buffers[i] == NULL) { + printf("contigmalloc failed for buffer %d\n", i); + return ENOMEM; + } + + printf("%2u: virt=%p phys=%p\n", i, contigmem_buffers[i], + (void *)pmap_kextract((vm_offset_t)contigmem_buffers[i])); + + snprintf(index_string, sizeof(index_string), "%d", i); + snprintf(description, sizeof(description), + "phys addr for buffer %d", i); + SYSCTL_ADD_PROC(NULL, + &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO, + index_string, CTLTYPE_U64 | CTLFLAG_RD, + (void *)(uintptr_t)i, 0, contigmem_physaddr, "LU", + description); + } + + contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT, + GID_WHEEL, 0600, "contigmem"); + + return 0; +} + +static int +contigmem_unload() +{ + int i; + + if (contigmem_cdev != NULL) + destroy_dev(contigmem_cdev); + + if (contigmem_eh_tag != NULL) + EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag); + + for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) + if (contigmem_buffers[i] != NULL) + contigfree(contigmem_buffers[i], contigmem_buffer_size, + M_CONTIGMEM); + + return 0; +} + +static int +contigmem_physaddr(SYSCTL_HANDLER_ARGS) +{ + uint64_t physaddr; + int index = (int)(uintptr_t)arg1; + + physaddr = (uint64_t)vtophys(contigmem_buffers[index]); + return sysctl_handle_64(oidp, &physaddr, 0, req); +} + +static int +contigmem_open(struct cdev *cdev, int fflags, int devtype, + struct thread *td) +{ + return 0; +} + +static int +contigmem_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, + int prot, vm_memattr_t *memattr) +{ + + *paddr = offset; + return 0; +} + +static int +contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, + struct vm_object **obj, int nprot) +{ + /* + * The buffer index is encoded in the offset. Divide the offset by + * PAGE_SIZE to get the index of the buffer requested by the user + * app. + */ + if ((*offset/PAGE_SIZE) >= contigmem_num_buffers) + return EINVAL; + + *offset = (vm_ooffset_t)vtophys(contigmem_buffers[*offset/PAGE_SIZE]); + *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, + curthread->td_ucred); + + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile new file mode 100644 index 00000000..9054ad61 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -0,0 +1,117 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_eal.a + +ARCH_DIR ?= $(RTE_ARCH) +VPATH += $(RTE_SDK)/lib/librte_eal/common +VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) + +CFLAGS += -I$(SRCDIR)/include +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include +CFLAGS += -I$(RTE_SDK)/lib/librte_ring +CFLAGS += -I$(RTE_SDK)/lib/librte_mempool +CFLAGS += $(WERROR_FLAGS) -O3 + +LDLIBS += -lexecinfo +LDLIBS += -lpthread +LDLIBS += -lgcc_s + +EXPORT_MAP := rte_eal_version.map + +LIBABIVER := 2 + +# specific to linuxapp exec-env +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c + +# from common dir +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_cpuflags.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c + +# from arch dir +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c + +CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) + +CFLAGS_eal.o := -D_GNU_SOURCE +#CFLAGS_eal_thread.o := -D_GNU_SOURCE +CFLAGS_eal_log.o := -D_GNU_SOURCE +CFLAGS_eal_common_log.o := -D_GNU_SOURCE + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_eal_thread.o += -Wno-return-type +CFLAGS_eal_hpet.o += -Wno-return-type +endif + +INC := rte_interrupts.h + +SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \ + $(addprefix include/exec-env/,$(INC)) + +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c new file mode 100644 index 00000000..06bfd4e0 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -0,0 +1,638 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" +#include "eal_options.h" + +#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) + +/* Allow the application to print its usage message too if set */ +static rte_usage_hook_t rte_application_usage_hook = NULL; +/* early configuration structure, when memory config is not mmapped */ +static struct rte_mem_config early_mem_config; + +/* define fd variable here, because file needs to be kept open for the + * duration of the program, as we hold a write lock on it in the primary proc */ +static int mem_cfg_fd = -1; + +static struct flock wr_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = offsetof(struct rte_mem_config, memseg), + .l_len = sizeof(early_mem_config.memseg), +}; + +/* Address of global and public configuration */ +static struct rte_config rte_config = { + .mem_config = &early_mem_config, +}; + +/* internal configuration (per-core) */ +struct lcore_config lcore_config[RTE_MAX_LCORE]; + +/* internal configuration */ +struct internal_config internal_config; + +/* used by rte_rdtsc() */ +int rte_cycles_vmware_tsc_map; + +/* Return a pointer to the configuration structure */ +struct rte_config * +rte_eal_get_configuration(void) +{ + return &rte_config; +} + +/* parse a sysfs (or other) file containing one integer value */ +int +eal_parse_sysfs_value(const char *filename, unsigned long *val) +{ + FILE *f; + char buf[BUFSIZ]; + char *end = NULL; + + if ((f = fopen(filename, "r")) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + *val = strtoul(buf, &end, 0); + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + + +/* create memory configuration in shared/mmap memory. Take out + * a write lock on the memsegs, so we can auto-detect primary/secondary. + * This means we never close the file while running (auto-close on exit). + * We also don't lock the whole file, so that in future we can use read-locks + * on other parts, e.g. memzones, to detect if there are running secondary + * processes. */ +static void +rte_eal_config_create(void) +{ + void *rte_mem_cfg_addr; + int retval; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + if (retval < 0){ + close(mem_cfg_fd); + rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname); + } + + retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); + if (retval < 0){ + close(mem_cfg_fd); + rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary " + "process running?\n", pathname); + } + + rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + + if (rte_mem_cfg_addr == MAP_FAILED){ + rte_panic("Cannot mmap memory for rte_config\n"); + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); + rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; +} + +/* attach to an existing shared memory config */ +static void +rte_eal_config_attach(void) +{ + void *rte_mem_cfg_addr; + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + close(mem_cfg_fd); + if (rte_mem_cfg_addr == MAP_FAILED) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; +} + +/* Detect if we are a primary or a secondary process */ +enum rte_proc_type_t +eal_proc_type_detect(void) +{ + enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; + const char *pathname = eal_runtime_config_path(); + + /* if we can open the file but not get a write-lock we are a secondary + * process. NOTE: if we get a file handle back, we keep that open + * and don't close it to prevent a race condition between multiple opens */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + + RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", + ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); + + return ptype; +} + +/* Sets up rte_config structure with the pointer to shared memory config.*/ +static void +rte_config_init(void) +{ + rte_config.process_type = internal_config.process_type; + + switch (rte_config.process_type){ + case RTE_PROC_PRIMARY: + rte_eal_config_create(); + break; + case RTE_PROC_SECONDARY: + rte_eal_config_attach(); + rte_eal_mcfg_wait_complete(rte_config.mem_config); + break; + case RTE_PROC_AUTO: + case RTE_PROC_INVALID: + rte_panic("Invalid process type\n"); + } +} + +/* display usage */ +static void +eal_usage(const char *prgname) +{ + printf("\nUsage: %s ", prgname); + eal_common_usage(); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { + printf("===== Application Usage =====\n\n"); + rte_application_usage_hook(prgname); + } +} + +/* Set a per-application usage message */ +rte_usage_hook_t +rte_set_application_usage_hook( rte_usage_hook_t usage_func ) +{ + rte_usage_hook_t old_func; + + /* Will be NULL on the first call to denote the last usage routine. */ + old_func = rte_application_usage_hook; + rte_application_usage_hook = usage_func; + + return old_func; +} + +static inline size_t +eal_get_hugepage_mem_size(void) +{ + uint64_t size = 0; + unsigned i, j; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } + } + + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; +} + +/* Parse the arguments for --log-level only */ +static void +eal_log_level_parse(int argc, char **argv) +{ + int opt; + char **argvopt; + int option_index; + const int old_optind = optind; + const int old_optopt = optopt; + const int old_optreset = optreset; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + optreset = 1; + + eal_reset_internal_config(&internal_config); + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + int ret; + + /* getopt is not happy, stop right now */ + if (opt == '?') + break; + + ret = (opt == OPT_LOG_LEVEL_NUM) ? + eal_parse_common_option(opt, optarg, &internal_config) : 0; + + /* common parser is not happy */ + if (ret < 0) + break; + } + + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optreset = old_optreset; + optarg = old_optarg; +} + +/* Parse the argument given in the command line of the application */ +static int +eal_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + const int old_optind = optind; + const int old_optopt = optopt; + const int old_optreset = optreset; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + optreset = 1; + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + /* getopt is not happy, stop right now */ + if (opt == '?') { + eal_usage(prgname); + ret = -1; + goto out; + } + + ret = eal_parse_common_option(opt, optarg, &internal_config); + /* common parser is not happy */ + if (ret < 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + /* common parser handled this option */ + if (ret == 0) + continue; + + switch (opt) { + case 'h': + eal_usage(prgname); + exit(EXIT_SUCCESS); + default: + if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { + RTE_LOG(ERR, EAL, "Option %c is not supported " + "on FreeBSD\n", opt); + } else if (opt >= OPT_LONG_MIN_NUM && + opt < OPT_LONG_MAX_NUM) { + RTE_LOG(ERR, EAL, "Option %s is not supported " + "on FreeBSD\n", + eal_long_options[option_index].name); + } else { + RTE_LOG(ERR, EAL, "Option %d is not supported " + "on FreeBSD\n", opt); + } + eal_usage(prgname); + ret = -1; + goto out; + } + } + + if (eal_adjust_config(&internal_config) != 0) { + ret = -1; + goto out; + } + + /* sanity checks */ + if (eal_check_common_options(&internal_config) != 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + + if (optind >= 0) + argv[optind-1] = prgname; + ret = optind-1; + +out: + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optreset = old_optreset; + optarg = old_optarg; + + return ret; +} + +static void +eal_check_mem_on_local_socket(void) +{ + const struct rte_memseg *ms; + int i, socket_id; + + socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); + + ms = rte_eal_get_physmem_layout(); + + for (i = 0; i < RTE_MAX_MEMSEG; i++) + if (ms[i].socket_id == socket_id && + ms[i].len > 0) + return; + + RTE_LOG(WARNING, EAL, "WARNING: Master core has no " + "memory on local socket!\n"); +} + +static int +sync_func(__attribute__((unused)) void *arg) +{ + return 0; +} + +inline static void +rte_eal_mcfg_complete(void) +{ + /* ALL shared mem_config related INIT DONE */ + if (rte_config.process_type == RTE_PROC_PRIMARY) + rte_config.mem_config->magic = RTE_MAGIC; +} + +/* return non-zero if hugepages are enabled. */ +int rte_eal_has_hugepages(void) +{ + return !internal_config.no_hugetlbfs; +} + +/* Abstraction for port I/0 privilege */ +int +rte_eal_iopl_init(void) +{ + static int fd; + + fd = open("/dev/io", O_RDWR); + if (fd < 0) + return -1; + /* keep fd open for iopl */ + return 0; +} + +/* Launch threads, called at application init(). */ +int +rte_eal_init(int argc, char **argv) +{ + int i, fctret, ret; + pthread_t thread_id; + static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (!rte_atomic32_test_and_set(&run_once)) + return -1; + + thread_id = pthread_self(); + + if (rte_eal_log_early_init() < 0) + rte_panic("Cannot init early logs\n"); + + eal_log_level_parse(argc, argv); + + /* set log level as early as possible */ + rte_set_log_level(internal_config.log_level); + + if (rte_eal_cpu_init() < 0) + rte_panic("Cannot detect lcores\n"); + + fctret = eal_parse_args(argc, argv); + if (fctret < 0) + exit(1); + + if (internal_config.no_hugetlbfs == 0 && + internal_config.process_type != RTE_PROC_SECONDARY && + eal_hugepage_info_init() < 0) + rte_panic("Cannot get hugepage information\n"); + + if (internal_config.memory == 0 && internal_config.force_sockets == 0) { + if (internal_config.no_hugetlbfs) + internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; + else + internal_config.memory = eal_get_hugepage_mem_size(); + } + + if (internal_config.vmware_tsc_map == 1) { +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + rte_cycles_vmware_tsc_map = 1; + RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " + "you must have monitor_control.pseudo_perfctr = TRUE\n"); +#else + RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " + "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); +#endif + } + + rte_srand(rte_rdtsc()); + + rte_config_init(); + + if (rte_eal_memory_init() < 0) + rte_panic("Cannot init memory\n"); + + if (rte_eal_memzone_init() < 0) + rte_panic("Cannot init memzone\n"); + + if (rte_eal_tailqs_init() < 0) + rte_panic("Cannot init tail queues for objects\n"); + +/* if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n");*/ + + if (rte_eal_alarm_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + if (rte_eal_intr_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + if (rte_eal_timer_init() < 0) + rte_panic("Cannot init HPET or TSC timers\n"); + + if (rte_eal_pci_init() < 0) + rte_panic("Cannot init PCI\n"); + + eal_check_mem_on_local_socket(); + + if (eal_plugins_init() < 0) + rte_panic("Cannot init plugins\n"); + + eal_thread_init_master(rte_config.master_lcore); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n", + rte_config.master_lcore, thread_id, cpuset, + ret == 0 ? "" : "..."); + + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + + RTE_LCORE_FOREACH_SLAVE(i) { + + /* + * create communication pipes between master thread + * and children + */ + if (pipe(lcore_config[i].pipe_master2slave) < 0) + rte_panic("Cannot create pipe\n"); + if (pipe(lcore_config[i].pipe_slave2master) < 0) + rte_panic("Cannot create pipe\n"); + + lcore_config[i].state = WAIT; + + /* create a thread for each lcore */ + ret = pthread_create(&lcore_config[i].thread_id, NULL, + eal_thread_loop, NULL); + if (ret != 0) + rte_panic("Cannot create thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "lcore-slave-%d", i); + pthread_set_name_np(lcore_config[i].thread_id, thread_name); + } + + /* + * Launch a dummy function on all slave lcores, so that master lcore + * knows they are all ready when this function returns. + */ + rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); + rte_eal_mp_wait_lcore(); + + /* Probe & Initialize PCI devices */ + if (rte_eal_pci_probe()) + rte_panic("Cannot probe PCI\n"); + + rte_eal_mcfg_complete(); + + return fctret; +} + +/* get core role */ +enum rte_lcore_role_t +rte_eal_lcore_role(unsigned lcore_id) +{ + return rte_config.lcore_role[lcore_id]; +} + +enum rte_proc_type_t +rte_eal_process_type(void) +{ + return rte_config.process_type; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c new file mode 100644 index 00000000..204df85d --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c @@ -0,0 +1,60 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include "eal_private.h" + +int +rte_eal_alarm_init(void) +{ + return 0; +} + + +int +rte_eal_alarm_set(uint64_t us __rte_unused, + rte_eal_alarm_callback cb_fn __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c new file mode 100644 index 00000000..907fbfa7 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_debug.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BACKTRACE_SIZE 256 + +/* dump the stack of the calling core */ +void rte_dump_stack(void) +{ + void *func[BACKTRACE_SIZE]; + char **symb = NULL; + int size; + + size = backtrace(func, BACKTRACE_SIZE); + symb = backtrace_symbols(func, size); + + if (symb == NULL) + return; + + while (size > 0) { + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, + "%d: [%s]\n", size, symb[size - 1]); + size --; + } + + free(symb); +} + +/* not implemented in this environment */ +void rte_dump_registers(void) +{ + return; +} + +/* call abort(), it will generate a coredump if enabled */ +void __rte_panic(const char *funcname, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + rte_dump_stack(); + rte_dump_registers(); + abort(); +} + +/* + * Like rte_panic this terminates the application. However, no traceback is + * provided and no core-dump is generated. + */ +void +rte_exit(int exit_code, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + if (exit_code != 0) + RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" + " Cause: ", exit_code); + + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + +#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR + exit(exit_code); +#else + rte_dump_stack(); + rte_dump_registers(); + abort(); +#endif +} diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c new file mode 100644 index 00000000..8a33c30c --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c @@ -0,0 +1,134 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include + +#include +#include +#include "eal_hugepages.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" + +#define CONTIGMEM_DEV "/dev/contigmem" + +/* + * Uses mmap to create a shared memory area for storage of data + * Used in this file to store the hugepage file map on disk + */ +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + void *retval; + int fd = open(filename, O_CREAT | O_RDWR, 0666); + if (fd < 0) + return NULL; + if (ftruncate(fd, mem_size) < 0) { + close(fd); + return NULL; + } + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + return retval; +} + +/* + * No hugepage support on freebsd, but we dummy it, using contigmem driver + */ +int +eal_hugepage_info_init(void) +{ + size_t sysctl_size; + int num_buffers, fd, error; + int64_t buffer_size; + /* re-use the linux "internal config" structure for our memory data */ + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + struct hugepage_info *tmp_hpi; + + sysctl_size = sizeof(num_buffers); + error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers, + &sysctl_size, NULL, 0); + + if (error != 0) { + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers"); + return -1; + } + + sysctl_size = sizeof(buffer_size); + error = sysctlbyname("hw.contigmem.buffer_size", &buffer_size, + &sysctl_size, NULL, 0); + + if (error != 0) { + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size"); + return -1; + } + + fd = open(CONTIGMEM_DEV, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "could not open "CONTIGMEM_DEV"\n"); + return -1; + } + + if (buffer_size >= 1<<30) + RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dGB\n", + num_buffers, (int)(buffer_size>>30)); + else if (buffer_size >= 1<<20) + RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dMB\n", + num_buffers, (int)(buffer_size>>20)); + else + RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n", + num_buffers, (int)(buffer_size>>10)); + + internal_config.num_hugepage_sizes = 1; + hpi->hugedir = CONTIGMEM_DEV; + hpi->hugepage_sz = buffer_size; + hpi->num_pages[0] = num_buffers; + hpi->lock_descriptor = fd; + + tmp_hpi = create_shared_memory(eal_hugepage_info_path(), + sizeof(struct hugepage_info)); + if (tmp_hpi == NULL ) { + RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); + return -1; + } + + memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info)); + + if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c new file mode 100644 index 00000000..836e4836 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "eal_private.h" + +int +rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused, + rte_intr_callback_fn cb __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused, + rte_intr_callback_fn cb_fn __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_eal_intr_init(void) +{ + return 0; +} + +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data) +{ + RTE_SET_USED(intr_handle); + RTE_SET_USED(epfd); + RTE_SET_USED(op); + RTE_SET_USED(vec); + RTE_SET_USED(data); + + return -ENOTSUP; +} + +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) +{ + RTE_SET_USED(intr_handle); + RTE_SET_USED(nb_efd); + + return 0; +} + +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); +} + +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); + return 0; +} + +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); + return 1; +} + +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_lcore.c b/lib/librte_eal/bsdapp/eal/eal_lcore.c new file mode 100644 index 00000000..b8bfafde --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_lcore.c @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +/* No topology information available on FreeBSD including NUMA info */ +unsigned +eal_cpu_core_id(__rte_unused unsigned lcore_id) +{ + return 0; +} + +static int +eal_get_ncpus(void) +{ + int mib[2] = {CTL_HW, HW_NCPU}; + int ncpu; + size_t len = sizeof(ncpu); + + sysctl(mib, 2, &ncpu, &len, NULL, 0); + RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu); + return ncpu; +} + +unsigned +eal_cpu_socket_id(__rte_unused unsigned cpu_id) +{ + return 0; +} + +/* Check if a cpu is present by the presence of the + * cpu information for it. + */ +int +eal_cpu_detected(unsigned lcore_id) +{ + const unsigned ncpus = eal_get_ncpus(); + return lcore_id < ncpus; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_log.c b/lib/librte_eal/bsdapp/eal/eal_log.c new file mode 100644 index 00000000..a425f7a8 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_log.c @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include + +/* + * set the log to default function, called during eal init process, + * once memzones are available. + */ +int +rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused) +{ + if (rte_eal_common_log_init(stderr) < 0) + return -1; + return 0; +} + +int +rte_eal_log_early_init(void) +{ + rte_openlog_stream(stderr); + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c new file mode 100644 index 00000000..3614da8d --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -0,0 +1,194 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" + +#define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) + +/* + * Get physical address of any mapped virtual address in the current process. + */ +phys_addr_t +rte_mem_virt2phy(const void *virtaddr) +{ + /* XXX not implemented. This function is only used by + * rte_mempool_virt2phy() when hugepages are disabled. */ + (void)virtaddr; + return RTE_BAD_PHYS_ADDR; +} + +int +rte_eal_hugepage_init(void) +{ + struct rte_mem_config *mcfg; + uint64_t total_mem = 0; + void *addr; + unsigned i, j, seg_idx = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* for debug purposes, hugetlbfs can be disabled */ + if (internal_config.no_hugetlbfs) { + addr = malloc(internal_config.memory); + mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].addr = addr; + mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; + mcfg->memseg[0].len = internal_config.memory; + mcfg->memseg[0].socket_id = 0; + return 0; + } + + /* map all hugepages and sort them */ + for (i = 0; i < internal_config.num_hugepage_sizes; i ++){ + struct hugepage_info *hpi; + + hpi = &internal_config.hugepage_info[i]; + for (j = 0; j < hpi->num_pages[0]; j++) { + struct rte_memseg *seg; + uint64_t physaddr; + int error; + size_t sysctl_size = sizeof(physaddr); + char physaddr_str[64]; + + addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE, + MAP_SHARED, hpi->lock_descriptor, + j * EAL_PAGE_SIZE); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", + j, hpi->hugedir); + return -1; + } + + snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem" + ".physaddr.%d", j); + error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size, + NULL, 0); + if (error < 0) { + RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u " + "from %s\n", j, hpi->hugedir); + return -1; + } + + seg = &mcfg->memseg[seg_idx++]; + seg->addr = addr; + seg->phys_addr = physaddr; + seg->hugepage_sz = hpi->hugepage_sz; + seg->len = hpi->hugepage_sz; + seg->nchannel = mcfg->nchannel; + seg->nrank = mcfg->nrank; + seg->socket_id = 0; + + RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%" + PRIx64", len %zu\n", + seg_idx, addr, physaddr, hpi->hugepage_sz); + if (total_mem >= internal_config.memory || + seg_idx >= RTE_MAX_MEMSEG) + break; + } + } + return 0; +} + +int +rte_eal_hugepage_attach(void) +{ + const struct hugepage_info *hpi; + int fd_hugepage_info, fd_hugepage = -1; + unsigned i = 0; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + /* Obtain a file descriptor for hugepage_info */ + fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY); + if (fd_hugepage_info < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); + return -1; + } + + /* Map the shared hugepage_info into the process address spaces */ + hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE, + fd_hugepage_info, 0); + if (hpi == NULL) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); + goto error; + } + + /* Obtain a file descriptor for contiguous memory */ + fd_hugepage = open(hpi->hugedir, O_RDWR); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir); + goto error; + } + + /* Map the contiguous memory into each memory segment */ + for (i = 0; i < hpi->num_pages[0]; i++) { + + void *addr; + struct rte_memseg *seg = &mcfg->memseg[i]; + + addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_FIXED, fd_hugepage, + i * EAL_PAGE_SIZE); + if (addr == MAP_FAILED || addr != seg->addr) { + RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", + i, hpi->hugedir); + goto error; + } + + } + + /* hugepage_info is no longer required */ + munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info)); + close(fd_hugepage_info); + close(fd_hugepage); + return 0; + +error: + if (fd_hugepage_info >= 0) + close(fd_hugepage_info); + if (fd_hugepage >= 0) + close(fd_hugepage); + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c new file mode 100644 index 00000000..2d16d782 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -0,0 +1,633 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(RTE_ARCH_X86) +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_private.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in + * sysfs. Moreover, when a registered driver matches a device, the + * kernel driver currently using it is unloaded and replaced by + * igb_uio module, which is a very minimal userland driver for Intel + * network card, only providing access to PCI BAR to applications, and + * enabling bus master. + */ + +/* unbind kernel driver for this device */ +int +pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused) +{ + RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented " + "for BSD\n"); + return -ENOTSUP; +} + +/* Map pci device */ +int +rte_eal_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_eal_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.fd) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char devname[PATH_MAX]; /* contains the /dev/uioX */ + struct rte_pci_addr *loc; + + loc = &dev->addr; + + snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + if (access(devname, O_RDWR) < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char *devname; + void *mapaddr; + uint64_t offset; + uint64_t pagesz; + struct pci_map *maps; + + maps = uio_res->maps; + devname = uio_res->path; + pagesz = sysconf(_SC_PAGESIZE); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* if matching map is found, then use it */ + offset = res_idx * pagesz; + mapaddr = pci_map_resource(NULL, fd, (off_t)offset, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = offset; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +static int +pci_scan_one(int dev_pci_fd, struct pci_conf *conf) +{ + struct rte_pci_device *dev; + struct pci_bar_io bar; + unsigned i, max; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) { + return -1; + } + + memset(dev, 0, sizeof(*dev)); + dev->addr.domain = conf->pc_sel.pc_domain; + dev->addr.bus = conf->pc_sel.pc_bus; + dev->addr.devid = conf->pc_sel.pc_dev; + dev->addr.function = conf->pc_sel.pc_func; + + /* get vendor id */ + dev->id.vendor_id = conf->pc_vendor; + + /* get device id */ + dev->id.device_id = conf->pc_device; + + /* get subsystem_vendor id */ + dev->id.subsystem_vendor_id = conf->pc_subvendor; + + /* get subsystem_device id */ + dev->id.subsystem_device_id = conf->pc_subdevice; + + /* TODO: get max_vfs */ + dev->max_vfs = 0; + + /* FreeBSD has no NUMA support (yet) */ + dev->numa_node = 0; + + /* FreeBSD has only one pass through driver */ + dev->kdrv = RTE_KDRV_NIC_UIO; + + /* parse resources */ + switch (conf->pc_hdr & PCIM_HDRTYPE) { + case PCIM_HDRTYPE_NORMAL: + max = PCIR_MAX_BAR_0; + break; + case PCIM_HDRTYPE_BRIDGE: + max = PCIR_MAX_BAR_1; + break; + case PCIM_HDRTYPE_CARDBUS: + max = PCIR_MAX_BAR_2; + break; + default: + goto skipdev; + } + + for (i = 0; i <= max; i++) { + bar.pbi_sel = conf->pc_sel; + bar.pbi_reg = PCIR_BAR(i); + if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0) + continue; + + dev->mem_resource[i].len = bar.pbi_length; + if (PCI_BAR_IO(bar.pbi_base)) { + dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf)); + continue; + } + dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf); + } + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&pci_device_list)) { + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } + else { + struct rte_pci_device *dev2 = NULL; + int ret; + + TAILQ_FOREACH(dev2, &pci_device_list, next) { + ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + else if (ret < 0) { + TAILQ_INSERT_BEFORE(dev2, dev, next); + return 0; + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + memmove(dev2->mem_resource, + dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + return 0; + } + } + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } + + return 0; + +skipdev: + free(dev); + return 0; +} + +/* + * Scan the content of the PCI bus, and add the devices in the devices + * list. Call pci_scan_one() for each pci entry found. + */ +int +rte_eal_pci_scan(void) +{ + int fd; + unsigned dev_count = 0; + struct pci_conf matches[16]; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 0, + .patterns = NULL, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + do { + unsigned i; + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + for (i = 0; i < conf_io.num_matches; i++) + if (pci_scan_one(fd, &matches[i]) < 0) + goto error; + + dev_count += conf_io.num_matches; + } while(conf_io.status == PCI_GETCONF_MORE_DEVS); + + close(fd); + + RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count); + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Read PCI config space. */ +int rte_eal_pci_read_config(const struct rte_pci_device *dev, + void *buf, size_t len, off_t offset) +{ + int fd = -1; + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + .pi_width = len, + }; + + if (len == 3 || len > sizeof(pi.pi_data)) { + RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); + goto error; + } + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCREAD, &pi) < 0) + goto error; + close(fd); + + memcpy(buf, &pi.pi_data, len); + return 0; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Write PCI config space. */ +int rte_eal_pci_write_config(const struct rte_pci_device *dev, + const void *buf, size_t len, off_t offset) +{ + int fd = -1; + + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + .pi_data = *(const uint32_t *)buf, + .pi_width = len, + }; + + if (len == 3 || len > sizeof(pi.pi_data)) { + RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); + goto error; + } + + memcpy(&pi.pi_data, buf, len); + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCWRITE, &pi) < 0) + goto error; + + close(fd); + return 0; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +int +rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret; + + switch (dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) { + p->base = (uintptr_t)dev->mem_resource[bar].addr; + ret = 0; + } else + ret = -1; + break; +#endif + default: + ret = -1; + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +static void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + uint8_t *d; + int size; + unsigned short reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + *(uint32_t *)d = inl(reg); + } else if (len >= 2) { + size = 2; + *(uint16_t *)d = inw(reg); + } else { + size = 1; + *d = inb(reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_eal_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + default: + break; + } +} + +static void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + const uint8_t *s; + int size; + unsigned short reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + outl(*(const uint32_t *)s, reg); + } else if (len >= 2) { + size = 2; + outw(*(const uint16_t *)s, reg); + } else { + size = 1; + outb(*s, reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_eal_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + default: + break; + } +} + +int +rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret; + + switch (p->dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + ret = 0; + break; +#endif + default: + ret = -1; + break; + } + + return ret; +} + +/* Init the PCI EAL subsystem */ +int +rte_eal_pci_init(void) +{ + TAILQ_INIT(&pci_driver_list); + TAILQ_INIT(&pci_device_list); + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; + + if (rte_eal_pci_scan() < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); + return -1; + } + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c new file mode 100644 index 00000000..9a034373 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -0,0 +1,201 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY; +RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY; +RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset); + +/* + * Send a message to a slave lcore identified by slave_id to call a + * function f with argument arg. Once the execution is done, the + * remote lcore switch in FINISHED state. + */ +int +rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id) +{ + int n; + char c = 0; + int m2s = lcore_config[slave_id].pipe_master2slave[1]; + int s2m = lcore_config[slave_id].pipe_slave2master[0]; + + if (lcore_config[slave_id].state != WAIT) + return -EBUSY; + + lcore_config[slave_id].f = f; + lcore_config[slave_id].arg = arg; + + /* send message */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(m2s, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + /* wait ack */ + do { + n = read(s2m, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + return 0; +} + +/* set affinity for current thread */ +static int +eal_thread_set_affinity(void) +{ + unsigned lcore_id = rte_lcore_id(); + + /* acquire system unique id */ + rte_gettid(); + + /* update EAL thread core affinity */ + return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset); +} + +void eal_thread_init_master(unsigned lcore_id) +{ + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); +} + +/* main loop of threads */ +__attribute__((noreturn)) void * +eal_thread_loop(__attribute__((unused)) void *arg) +{ + char c; + int n, ret; + unsigned lcore_id; + pthread_t thread_id; + int m2s, s2m; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + + thread_id = pthread_self(); + + /* retrieve our lcore_id from the configuration structure */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (thread_id == lcore_config[lcore_id].thread_id) + break; + } + if (lcore_id == RTE_MAX_LCORE) + rte_panic("cannot retrieve lcore id\n"); + + m2s = lcore_config[lcore_id].pipe_master2slave[0]; + s2m = lcore_config[lcore_id].pipe_slave2master[1]; + + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n", + lcore_id, thread_id, cpuset, ret == 0 ? "" : "..."); + + /* read on our pipe to get commands */ + while (1) { + void *fct_arg; + + /* wait command */ + do { + n = read(m2s, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + lcore_config[lcore_id].state = RUNNING; + + /* send ack */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(s2m, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + if (lcore_config[lcore_id].f == NULL) + rte_panic("NULL function pointer\n"); + + /* call the function and store the return value */ + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; + rte_wmb(); + lcore_config[lcore_id].state = FINISHED; + } + + /* never reached */ + /* pthread_exit(NULL); */ + /* return NULL; */ +} + +/* require calling thread tid by gettid() */ +int rte_sys_gettid(void) +{ + long lwpid; + thr_self(&lwpid); + return (int)lwpid; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c new file mode 100644 index 00000000..f12d9bd2 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_timer.c @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +#ifdef RTE_LIBEAL_USE_HPET +#warning HPET is not supported in FreeBSD +#endif + +enum timer_source eal_timer_source = EAL_TIMER_TSC; + +uint64_t +get_tsc_freq(void) +{ + size_t sz; + int tmp; + uint64_t tsc_hz; + + sz = sizeof(tmp); + tmp = 0; + + if (sysctlbyname("kern.timecounter.smp_tsc", &tmp, &sz, NULL, 0)) + RTE_LOG(WARNING, EAL, "%s\n", strerror(errno)); + else if (tmp != 1) + RTE_LOG(WARNING, EAL, "TSC is not safe to use in SMP mode\n"); + + tmp = 0; + + if (sysctlbyname("kern.timecounter.invariant_tsc", &tmp, &sz, NULL, 0)) + RTE_LOG(WARNING, EAL, "%s\n", strerror(errno)); + else if (tmp != 1) + RTE_LOG(WARNING, EAL, "TSC is not invariant\n"); + + sz = sizeof(tsc_hz); + if (sysctlbyname("machdep.tsc_freq", &tsc_hz, &sz, NULL, 0)) { + RTE_LOG(WARNING, EAL, "%s\n", strerror(errno)); + return 0; + } + + return tsc_hz; +} + +int +rte_eal_timer_init(void) +{ + set_tsc_freq(); + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h new file mode 100644 index 00000000..99a33432 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h @@ -0,0 +1,107 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_DOM0_COMMON_H_ +#define _RTE_DOM0_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +#define DOM0_NAME_MAX 256 +#define DOM0_MM_DEV "/dev/dom0_mm" + +#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */ +#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ +#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */ +#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ +#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ + +#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) +#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) +#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) +#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) + +/** + * A structure used to store memory information. + */ +struct memory_info { + char name[DOM0_NAME_MAX]; + uint64_t size; +}; + +/** + * A structure used to store memory segment information. + */ +struct memseg_info { + uint32_t idx; + uint64_t pfn; + uint64_t size; + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +}; + +/** + * A structure used to store memory block information. + */ +struct memblock_info { + uint8_t exchange_flag; + uint64_t vir_addr; + uint64_t pfn; + uint64_t mfn; +}; +#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h new file mode 100644 index 00000000..c1995ee1 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h @@ -0,0 +1,137 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#error "don't include this file directly, please include generic " +#endif + +#ifndef _RTE_BSDAPP_INTERRUPTS_H_ +#define _RTE_BSDAPP_INTERRUPTS_H_ + +#define RTE_INTR_VEC_ZERO_OFFSET 0 +#define RTE_INTR_VEC_RXTX_OFFSET 1 + +#define RTE_MAX_RXTX_INTR_VEC_ID 32 + +enum rte_intr_handle_type { + RTE_INTR_HANDLE_UNKNOWN = 0, + RTE_INTR_HANDLE_UIO, /**< uio device handle */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_MAX +}; + +/** Handle for interrupts. */ +struct rte_intr_handle { + int fd; /**< file descriptor */ + int uio_cfg_fd; /**< UIO config file descriptor */ + enum rte_intr_handle_type type; /**< handle type */ + int max_intr; /**< max interrupt requested */ + uint32_t nb_efd; /**< number of available efds */ + int *intr_vec; /**< intr vector number array */ +}; + +/** + * @param intr_handle + * Pointer to the interrupt handle. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * @param op + * The operation be performed for the vector. + * Operation type of {ADD, DEL}. + * @param vec + * RX intr vector number added to the epoll instance wait list. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data); + +/** + * It enables the fastpath event fds if it's necessary. + * It creates event fds when multi-vectors allowed, + * otherwise it multiplexes the single event fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + * @param nb_efd + * Number of interrupt vector trying to enable. + * The value 0 is not allowed. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); + +/** + * It disable the fastpath event fds. + * It deletes registered eventfds and closes the open fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle); + +/** + * The fastpath interrupt is enabled or not. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); + +/** + * The interrupt handle instance allows other cause or not. + * Other cause stands for none fastpath interrupt. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int rte_intr_allow_others(struct rte_intr_handle *intr_handle); + +/** + * The multiple interrupt vector capability of interrupt handle instance. + * It returns zero if no multiple interrupt vector support. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); + +#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map new file mode 100644 index 00000000..58c2951e --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -0,0 +1,153 @@ +DPDK_2.0 { + global: + + __rte_panic; + devargs_list; + eal_parse_sysfs_value; + eal_timer_source; + lcore_config; + pci_device_list; + pci_driver_list; + per_lcore__lcore_id; + per_lcore__rte_errno; + rte_calloc; + rte_calloc_socket; + rte_cpu_check_supported; + rte_cpu_get_flag_enabled; + rte_cycles_vmware_tsc_map; + rte_delay_us; + rte_dump_physmem_layout; + rte_dump_registers; + rte_dump_stack; + rte_dump_tailq; + rte_eal_alarm_cancel; + rte_eal_alarm_set; + rte_eal_dev_init; + rte_eal_devargs_add; + rte_eal_devargs_dump; + rte_eal_devargs_type_count; + rte_eal_driver_register; + rte_eal_driver_unregister; + rte_eal_get_configuration; + rte_eal_get_lcore_state; + rte_eal_get_physmem_layout; + rte_eal_get_physmem_size; + rte_eal_has_hugepages; + rte_eal_hpet_init; + rte_eal_init; + rte_eal_iopl_init; + rte_eal_lcore_role; + rte_eal_mp_remote_launch; + rte_eal_mp_wait_lcore; + rte_eal_parse_devargs_str; + rte_eal_pci_dump; + rte_eal_pci_probe; + rte_eal_pci_probe_one; + rte_eal_pci_register; + rte_eal_pci_scan; + rte_eal_pci_unregister; + rte_eal_process_type; + rte_eal_remote_launch; + rte_eal_tailq_lookup; + rte_eal_tailq_register; + rte_eal_vdev_init; + rte_eal_vdev_uninit; + rte_eal_wait_lcore; + rte_exit; + rte_free; + rte_get_hpet_cycles; + rte_get_hpet_hz; + rte_get_log_level; + rte_get_log_type; + rte_get_tsc_hz; + rte_hexdump; + rte_intr_callback_register; + rte_intr_callback_unregister; + rte_intr_disable; + rte_intr_enable; + rte_log; + rte_log_add_in_history; + rte_log_cur_msg_loglevel; + rte_log_cur_msg_logtype; + rte_log_dump_history; + rte_log_set_history; + rte_logs; + rte_malloc; + rte_malloc_dump_stats; + rte_malloc_get_socket_stats; + rte_malloc_set_limit; + rte_malloc_socket; + rte_malloc_validate; + rte_malloc_virt2phy; + rte_mem_lock_page; + rte_mem_phy2mch; + rte_mem_virt2phy; + rte_memdump; + rte_memory_get_nchannel; + rte_memory_get_nrank; + rte_memzone_dump; + rte_memzone_lookup; + rte_memzone_reserve; + rte_memzone_reserve_aligned; + rte_memzone_reserve_bounded; + rte_memzone_walk; + rte_openlog_stream; + rte_realloc; + rte_set_application_usage_hook; + rte_set_log_level; + rte_set_log_type; + rte_socket_id; + rte_strerror; + rte_strsplit; + rte_sys_gettid; + rte_thread_get_affinity; + rte_thread_set_affinity; + rte_vlog; + rte_xen_dom0_memory_attach; + rte_xen_dom0_memory_init; + rte_zmalloc; + rte_zmalloc_socket; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_eal_pci_detach; + rte_eal_pci_read_config; + rte_eal_pci_write_config; + rte_intr_allow_others; + rte_intr_dp_is_en; + rte_intr_efd_disable; + rte_intr_efd_enable; + rte_intr_rx_ctl; + rte_memzone_free; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_intr_cap_multiple; + rte_keepalive_create; + rte_keepalive_dispatch_pings; + rte_keepalive_mark_alive; + rte_keepalive_register_core; + rte_xen_dom0_supported; + +} DPDK_2.1; + +DPDK_16.04 { + global: + + rte_cpu_get_flag_name; + rte_eal_pci_ioport_map; + rte_eal_pci_ioport_read; + rte_eal_pci_ioport_unmap; + rte_eal_pci_ioport_write; + rte_eal_pci_map_device; + rte_eal_pci_unmap_device; + rte_eal_primary_proc_alive; + +} DPDK_2.2; diff --git a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile new file mode 100644 index 00000000..5454ed85 --- /dev/null +++ b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile @@ -0,0 +1,36 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +KMOD= nic_uio +SRCS= nic_uio.c device_if.h bus_if.h pci_if.h + +.include diff --git a/lib/librte_eal/bsdapp/nic_uio/Makefile b/lib/librte_eal/bsdapp/nic_uio/Makefile new file mode 100644 index 00000000..89957615 --- /dev/null +++ b/lib/librte_eal/bsdapp/nic_uio/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = nic_uio + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -Winline -Wall -Werror +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h + +# +# all source are stored in SRCS-y +# +SRCS-y := nic_uio.c + +include $(RTE_SDK)/mk/rte.bsdmodule.mk diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c new file mode 100644 index 00000000..99a4975c --- /dev/null +++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c @@ -0,0 +1,335 @@ +/* - + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); + +#include /* defines used in kernel.h */ +#include +#include /* types used in module initialization */ +#include /* cdevsw struct */ +#include /* structs, prototypes for pci bus stuff and DEVMETHOD */ +#include +#include +#include +#include + +#include +#include /* For pci_get macros! */ +#include /* The softc holds our per-instance data. */ +#include +#include +#include +#include +#include + + +#define MAX_BARS (PCIR_MAX_BAR_0 + 1) + +#define MAX_DETACHED_DEVICES 128 +static device_t detached_devices[MAX_DETACHED_DEVICES] = {}; +static int num_detached = 0; + +struct nic_uio_softc { + device_t dev_t; + struct cdev *my_cdev; + int bar_id[MAX_BARS]; + struct resource *bar_res[MAX_BARS]; + u_long bar_start[MAX_BARS]; + u_long bar_size[MAX_BARS]; +}; + +/* Function prototypes */ +static d_open_t nic_uio_open; +static d_close_t nic_uio_close; +static d_mmap_t nic_uio_mmap; +static d_mmap_single_t nic_uio_mmap_single; +static int nic_uio_probe(device_t dev); +static int nic_uio_attach(device_t dev); +static int nic_uio_detach(device_t dev); +static int nic_uio_shutdown(void); +static int nic_uio_modevent(module_t mod, int type, void *arg); + +static struct cdevsw uio_cdevsw = { + .d_name = "nic_uio", + .d_version = D_VERSION, + .d_open = nic_uio_open, + .d_close = nic_uio_close, + .d_mmap = nic_uio_mmap, + .d_mmap_single = nic_uio_mmap_single, +}; + +static device_method_t nic_uio_methods[] = { + DEVMETHOD(device_probe, nic_uio_probe), + DEVMETHOD(device_attach, nic_uio_attach), + DEVMETHOD(device_detach, nic_uio_detach), + DEVMETHOD_END +}; + +struct device { + int vend; + int dev; +}; + +struct pci_bdf { + uint32_t bus; + uint32_t devid; + uint32_t function; +}; + +static devclass_t nic_uio_devclass; + +DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc)); +DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0); + +static int +nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, + int prot, vm_memattr_t *memattr) +{ + *paddr = offset; + return 0; +} + +static int +nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, + struct vm_object **obj, int nprot) +{ + /* + * The BAR index is encoded in the offset. Divide the offset by + * PAGE_SIZE to get the index of the bar requested by the user + * app. + */ + unsigned bar = *offset/PAGE_SIZE; + struct nic_uio_softc *sc = cdev->si_drv1; + + if (bar >= MAX_BARS) + return EINVAL; + + if (sc->bar_res[bar] == NULL) { + sc->bar_id[bar] = PCIR_BAR(bar); + + if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4))) + sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT, + &sc->bar_id[bar], RF_ACTIVE); + else + sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY, + &sc->bar_id[bar], RF_ACTIVE); + } + if (sc->bar_res[bar] == NULL) + return ENXIO; + + sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]); + sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]); + + device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar, + sc->bar_start[bar], sc->bar_size[bar]); + + *offset = sc->bar_start[bar]; + *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, + curthread->td_ucred); + return 0; +} + + +int +nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + return 0; +} + +int +nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + return 0; +} + +static int +nic_uio_probe (device_t dev) +{ + int i; + unsigned int bus = pci_get_bus(dev); + unsigned int device = pci_get_slot(dev); + unsigned int function = pci_get_function(dev); + + for (i = 0; i < num_detached; i++) + if (bus == pci_get_bus(detached_devices[i]) && + device == pci_get_slot(detached_devices[i]) && + function == pci_get_function(detached_devices[i])) { + device_set_desc(dev, "DPDK PCI Device"); + return BUS_PROBE_SPECIFIC; + } + + return ENXIO; +} + +static int +nic_uio_attach(device_t dev) +{ + int i; + struct nic_uio_softc *sc; + + sc = device_get_softc(dev); + sc->dev_t = dev; + sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev), + UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); + if (sc->my_cdev == NULL) + return ENXIO; + sc->my_cdev->si_drv1 = sc; + + for (i = 0; i < MAX_BARS; i++) + sc->bar_res[i] = NULL; + + pci_enable_busmaster(dev); + + return 0; +} + +static int +nic_uio_detach(device_t dev) +{ + int i; + struct nic_uio_softc *sc; + sc = device_get_softc(dev); + + for (i = 0; i < MAX_BARS; i++) + if (sc->bar_res[i] != NULL) { + + if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4))) + bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i], + sc->bar_res[i]); + else + bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i], + sc->bar_res[i]); + } + + if (sc->my_cdev != NULL) + destroy_dev(sc->my_cdev); + return 0; +} + +static void +nic_uio_load(void) +{ + uint32_t bus, device, function; + device_t dev; + char bdf_str[256]; + char *token, *remaining; + + memset(bdf_str, 0, sizeof(bdf_str)); + TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); + remaining = bdf_str; + /* + * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f". + * But the code below does not try differentiate between : and , + * and just blindly uses 3 tokens at a time to construct a + * bus/device/function tuple. + * + * There is no checking on strtol() return values, but this should + * be OK. Worst case is it cannot convert and returns 0. This + * could give us a different BDF than intended, but as long as the + * PCI device/vendor ID does not match it will not matter. + */ + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + bus = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + device = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + function = strtol(token, NULL, 10); + + dev = pci_find_bsf(bus, device, function); + if (dev == NULL) + continue; + + if (num_detached < MAX_DETACHED_DEVICES) { + printf("nic_uio_load: detaching and storing dev=%p\n", + dev); + detached_devices[num_detached++] = dev; + } else { + printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n", + MAX_DETACHED_DEVICES, dev); + } + device_detach(dev); + } +} + +static void +nic_uio_unload(void) +{ + int i; + printf("nic_uio_unload: entered...\n"); + + for (i = 0; i < num_detached; i++) { + printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n", + detached_devices[i]); + device_probe_and_attach(detached_devices[i]); + printf("nic_uio_unload: done.\n"); + } + + printf("nic_uio_unload: leaving...\n"); +} + +static int +nic_uio_shutdown(void) +{ + return 0; +} + +static int +nic_uio_modevent(module_t mod, int type, void *arg) +{ + + switch (type) { + case MOD_LOAD: + nic_uio_load(); + break; + case MOD_UNLOAD: + nic_uio_unload(); + break; + case MOD_SHUTDOWN: + nic_uio_shutdown(); + break; + default: + break; + } + + return 0; +} diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile new file mode 100644 index 00000000..f5ea0ee8 --- /dev/null +++ b/lib/librte_eal/common/Makefile @@ -0,0 +1,61 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +INC := rte_branch_prediction.h rte_common.h +INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h +INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h +INC += rte_tailq.h rte_interrupts.h rte_alarm.h +INC += rte_string_fns.h rte_version.h +INC += rte_eal_memconfig.h rte_malloc_heap.h +INC += rte_hexdump.h rte_devargs.h rte_dev.h +INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h +INC += rte_malloc.h rte_keepalive.h rte_time.h + +ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y) +INC += rte_warnings.h +endif + +GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h +GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h +# defined in mk/arch/$(RTE_ARCH)/rte.vars.mk +ARCH_DIR ?= $(RTE_ARCH) +ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)) + +SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC)) +SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \ + $(addprefix include/arch/$(ARCH_DIR)/,$(ARCH_INC)) +SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include/generic := \ + $(addprefix include/generic/,$(GENERIC_INC)) + +include $(RTE_SDK)/mk/rte.install.mk diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c new file mode 100644 index 00000000..23240efd --- /dev/null +++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c @@ -0,0 +1,179 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rte_cpuflags.h" + +#include +#include +#include +#include +#include + +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#ifndef AT_PLATFORM +#define AT_PLATFORM 15 +#endif + +enum cpu_register_t { + REG_NONE = 0, + REG_HWCAP, + REG_HWCAP2, + REG_PLATFORM, + REG_MAX +}; + +typedef uint32_t hwcap_registers_t[REG_MAX]; + +/** + * Struct to hold a processor feature entry + */ +struct feature_entry { + uint32_t reg; + uint32_t bit; +#define CPU_FLAG_NAME_MAX_LEN 64 + char name[CPU_FLAG_NAME_MAX_LEN]; +}; + +#define FEAT_DEF(name, reg, bit) \ + [RTE_CPUFLAG_##name] = {reg, bit, #name}, + +#ifdef RTE_ARCH_ARMv7 +#define PLATFORM_STR "v7l" +typedef Elf32_auxv_t _Elfx_auxv_t; + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(SWP, REG_HWCAP, 0) + FEAT_DEF(HALF, REG_HWCAP, 1) + FEAT_DEF(THUMB, REG_HWCAP, 2) + FEAT_DEF(A26BIT, REG_HWCAP, 3) + FEAT_DEF(FAST_MULT, REG_HWCAP, 4) + FEAT_DEF(FPA, REG_HWCAP, 5) + FEAT_DEF(VFP, REG_HWCAP, 6) + FEAT_DEF(EDSP, REG_HWCAP, 7) + FEAT_DEF(JAVA, REG_HWCAP, 8) + FEAT_DEF(IWMMXT, REG_HWCAP, 9) + FEAT_DEF(CRUNCH, REG_HWCAP, 10) + FEAT_DEF(THUMBEE, REG_HWCAP, 11) + FEAT_DEF(NEON, REG_HWCAP, 12) + FEAT_DEF(VFPv3, REG_HWCAP, 13) + FEAT_DEF(VFPv3D16, REG_HWCAP, 14) + FEAT_DEF(TLS, REG_HWCAP, 15) + FEAT_DEF(VFPv4, REG_HWCAP, 16) + FEAT_DEF(IDIVA, REG_HWCAP, 17) + FEAT_DEF(IDIVT, REG_HWCAP, 18) + FEAT_DEF(VFPD32, REG_HWCAP, 19) + FEAT_DEF(LPAE, REG_HWCAP, 20) + FEAT_DEF(EVTSTRM, REG_HWCAP, 21) + FEAT_DEF(AES, REG_HWCAP2, 0) + FEAT_DEF(PMULL, REG_HWCAP2, 1) + FEAT_DEF(SHA1, REG_HWCAP2, 2) + FEAT_DEF(SHA2, REG_HWCAP2, 3) + FEAT_DEF(CRC32, REG_HWCAP2, 4) + FEAT_DEF(V7L, REG_PLATFORM, 0) +}; + +#elif defined RTE_ARCH_ARM64 +#define PLATFORM_STR "aarch64" +typedef Elf64_auxv_t _Elfx_auxv_t; + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(FP, REG_HWCAP, 0) + FEAT_DEF(NEON, REG_HWCAP, 1) + FEAT_DEF(EVTSTRM, REG_HWCAP, 2) + FEAT_DEF(AES, REG_HWCAP, 3) + FEAT_DEF(PMULL, REG_HWCAP, 4) + FEAT_DEF(SHA1, REG_HWCAP, 5) + FEAT_DEF(SHA2, REG_HWCAP, 6) + FEAT_DEF(CRC32, REG_HWCAP, 7) + FEAT_DEF(ATOMICS, REG_HWCAP, 8) + FEAT_DEF(AARCH64, REG_PLATFORM, 1) +}; +#endif /* RTE_ARCH */ + +/* + * Read AUXV software register and get cpu features for ARM + */ +static void +rte_cpu_get_features(hwcap_registers_t out) +{ + int auxv_fd; + _Elfx_auxv_t auxv; + + auxv_fd = open("/proc/self/auxv", O_RDONLY); + assert(auxv_fd); + while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { + if (auxv.a_type == AT_HWCAP) { + out[REG_HWCAP] = auxv.a_un.a_val; + } else if (auxv.a_type == AT_HWCAP2) { + out[REG_HWCAP2] = auxv.a_un.a_val; + } else if (auxv.a_type == AT_PLATFORM) { + if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR)) + out[REG_PLATFORM] = 0x0001; + } + } +} + +/* + * Checks if a particular flag is available on current machine. + */ +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) +{ + const struct feature_entry *feat; + hwcap_registers_t regs = {0}; + + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return -ENOENT; + + feat = &rte_cpu_feature_table[feature]; + if (feat->reg == REG_NONE) + return -EFAULT; + + rte_cpu_get_features(regs); + return (regs[feat->reg] >> feat->bit) & 1; +} + +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) +{ + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return NULL; + return rte_cpu_feature_table[feature].name; +} diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c new file mode 100644 index 00000000..a8147c86 --- /dev/null +++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c @@ -0,0 +1,147 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "rte_cpuflags.h" + +#include +#include +#include +#include + +/* Symbolic values for the entries in the auxiliary table */ +#define AT_HWCAP 16 +#define AT_HWCAP2 26 + +/* software based registers */ +enum cpu_register_t { + REG_NONE = 0, + REG_HWCAP, + REG_HWCAP2, + REG_MAX +}; + +typedef uint32_t hwcap_registers_t[REG_MAX]; + +struct feature_entry { + uint32_t reg; + uint32_t bit; +#define CPU_FLAG_NAME_MAX_LEN 64 + char name[CPU_FLAG_NAME_MAX_LEN]; +}; + +#define FEAT_DEF(name, reg, bit) \ + [RTE_CPUFLAG_##name] = {reg, bit, #name}, + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(PPC_LE, REG_HWCAP, 0) + FEAT_DEF(TRUE_LE, REG_HWCAP, 1) + FEAT_DEF(PSERIES_PERFMON_COMPAT, REG_HWCAP, 6) + FEAT_DEF(VSX, REG_HWCAP, 7) + FEAT_DEF(ARCH_2_06, REG_HWCAP, 8) + FEAT_DEF(POWER6_EXT, REG_HWCAP, 9) + FEAT_DEF(DFP, REG_HWCAP, 10) + FEAT_DEF(PA6T, REG_HWCAP, 11) + FEAT_DEF(ARCH_2_05, REG_HWCAP, 12) + FEAT_DEF(ICACHE_SNOOP, REG_HWCAP, 13) + FEAT_DEF(SMT, REG_HWCAP, 14) + FEAT_DEF(BOOKE, REG_HWCAP, 15) + FEAT_DEF(CELLBE, REG_HWCAP, 16) + FEAT_DEF(POWER5_PLUS, REG_HWCAP, 17) + FEAT_DEF(POWER5, REG_HWCAP, 18) + FEAT_DEF(POWER4, REG_HWCAP, 19) + FEAT_DEF(NOTB, REG_HWCAP, 20) + FEAT_DEF(EFP_DOUBLE, REG_HWCAP, 21) + FEAT_DEF(EFP_SINGLE, REG_HWCAP, 22) + FEAT_DEF(SPE, REG_HWCAP, 23) + FEAT_DEF(UNIFIED_CACHE, REG_HWCAP, 24) + FEAT_DEF(4xxMAC, REG_HWCAP, 25) + FEAT_DEF(MMU, REG_HWCAP, 26) + FEAT_DEF(FPU, REG_HWCAP, 27) + FEAT_DEF(ALTIVEC, REG_HWCAP, 28) + FEAT_DEF(PPC601, REG_HWCAP, 29) + FEAT_DEF(PPC64, REG_HWCAP, 30) + FEAT_DEF(PPC32, REG_HWCAP, 31) + FEAT_DEF(TAR, REG_HWCAP2, 26) + FEAT_DEF(LSEL, REG_HWCAP2, 27) + FEAT_DEF(EBB, REG_HWCAP2, 28) + FEAT_DEF(DSCR, REG_HWCAP2, 29) + FEAT_DEF(HTM, REG_HWCAP2, 30) + FEAT_DEF(ARCH_2_07, REG_HWCAP2, 31) +}; + +/* + * Read AUXV software register and get cpu features for Power + */ +static void +rte_cpu_get_features(hwcap_registers_t out) +{ + int auxv_fd; + Elf64_auxv_t auxv; + + auxv_fd = open("/proc/self/auxv", O_RDONLY); + assert(auxv_fd); + while (read(auxv_fd, &auxv, + sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { + if (auxv.a_type == AT_HWCAP) + out[REG_HWCAP] = auxv.a_un.a_val; + else if (auxv.a_type == AT_HWCAP2) + out[REG_HWCAP2] = auxv.a_un.a_val; + } +} + +/* + * Checks if a particular flag is available on current machine. + */ +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) +{ + const struct feature_entry *feat; + hwcap_registers_t regs = {0}; + + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return -ENOENT; + + feat = &rte_cpu_feature_table[feature]; + if (feat->reg == REG_NONE) + return -EFAULT; + + rte_cpu_get_features(regs); + return (regs[feat->reg] >> feat->bit) & 1; +} + +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) +{ + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return NULL; + return rte_cpu_feature_table[feature].name; +} diff --git a/lib/librte_eal/common/arch/tile/rte_cpuflags.c b/lib/librte_eal/common/arch/tile/rte_cpuflags.c new file mode 100644 index 00000000..a2b6c51a --- /dev/null +++ b/lib/librte_eal/common/arch/tile/rte_cpuflags.c @@ -0,0 +1,47 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "rte_cpuflags.h" + +#include + +const struct feature_entry rte_cpu_feature_table[] = { +}; + +/* + * Checks if a particular flag is available on current machine. + */ +int +rte_cpu_get_flag_enabled(__attribute__((unused)) enum rte_cpu_flag_t feature) +{ + return -ENOENT; +} diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c new file mode 100644 index 00000000..01382571 --- /dev/null +++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c @@ -0,0 +1,220 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rte_cpuflags.h" + +#include +#include +#include + +enum cpu_register_t { + RTE_REG_EAX = 0, + RTE_REG_EBX, + RTE_REG_ECX, + RTE_REG_EDX, +}; + +typedef uint32_t cpuid_registers_t[4]; + +/** + * Struct to hold a processor feature entry + */ +struct feature_entry { + uint32_t leaf; /**< cpuid leaf */ + uint32_t subleaf; /**< cpuid subleaf */ + uint32_t reg; /**< cpuid register */ + uint32_t bit; /**< cpuid register bit */ +#define CPU_FLAG_NAME_MAX_LEN 64 + char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */ +}; + +#define FEAT_DEF(name, leaf, subleaf, reg, bit) \ + [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name }, + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(SSE3, 0x00000001, 0, RTE_REG_ECX, 0) + FEAT_DEF(PCLMULQDQ, 0x00000001, 0, RTE_REG_ECX, 1) + FEAT_DEF(DTES64, 0x00000001, 0, RTE_REG_ECX, 2) + FEAT_DEF(MONITOR, 0x00000001, 0, RTE_REG_ECX, 3) + FEAT_DEF(DS_CPL, 0x00000001, 0, RTE_REG_ECX, 4) + FEAT_DEF(VMX, 0x00000001, 0, RTE_REG_ECX, 5) + FEAT_DEF(SMX, 0x00000001, 0, RTE_REG_ECX, 6) + FEAT_DEF(EIST, 0x00000001, 0, RTE_REG_ECX, 7) + FEAT_DEF(TM2, 0x00000001, 0, RTE_REG_ECX, 8) + FEAT_DEF(SSSE3, 0x00000001, 0, RTE_REG_ECX, 9) + FEAT_DEF(CNXT_ID, 0x00000001, 0, RTE_REG_ECX, 10) + FEAT_DEF(FMA, 0x00000001, 0, RTE_REG_ECX, 12) + FEAT_DEF(CMPXCHG16B, 0x00000001, 0, RTE_REG_ECX, 13) + FEAT_DEF(XTPR, 0x00000001, 0, RTE_REG_ECX, 14) + FEAT_DEF(PDCM, 0x00000001, 0, RTE_REG_ECX, 15) + FEAT_DEF(PCID, 0x00000001, 0, RTE_REG_ECX, 17) + FEAT_DEF(DCA, 0x00000001, 0, RTE_REG_ECX, 18) + FEAT_DEF(SSE4_1, 0x00000001, 0, RTE_REG_ECX, 19) + FEAT_DEF(SSE4_2, 0x00000001, 0, RTE_REG_ECX, 20) + FEAT_DEF(X2APIC, 0x00000001, 0, RTE_REG_ECX, 21) + FEAT_DEF(MOVBE, 0x00000001, 0, RTE_REG_ECX, 22) + FEAT_DEF(POPCNT, 0x00000001, 0, RTE_REG_ECX, 23) + FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, RTE_REG_ECX, 24) + FEAT_DEF(AES, 0x00000001, 0, RTE_REG_ECX, 25) + FEAT_DEF(XSAVE, 0x00000001, 0, RTE_REG_ECX, 26) + FEAT_DEF(OSXSAVE, 0x00000001, 0, RTE_REG_ECX, 27) + FEAT_DEF(AVX, 0x00000001, 0, RTE_REG_ECX, 28) + FEAT_DEF(F16C, 0x00000001, 0, RTE_REG_ECX, 29) + FEAT_DEF(RDRAND, 0x00000001, 0, RTE_REG_ECX, 30) + + FEAT_DEF(FPU, 0x00000001, 0, RTE_REG_EDX, 0) + FEAT_DEF(VME, 0x00000001, 0, RTE_REG_EDX, 1) + FEAT_DEF(DE, 0x00000001, 0, RTE_REG_EDX, 2) + FEAT_DEF(PSE, 0x00000001, 0, RTE_REG_EDX, 3) + FEAT_DEF(TSC, 0x00000001, 0, RTE_REG_EDX, 4) + FEAT_DEF(MSR, 0x00000001, 0, RTE_REG_EDX, 5) + FEAT_DEF(PAE, 0x00000001, 0, RTE_REG_EDX, 6) + FEAT_DEF(MCE, 0x00000001, 0, RTE_REG_EDX, 7) + FEAT_DEF(CX8, 0x00000001, 0, RTE_REG_EDX, 8) + FEAT_DEF(APIC, 0x00000001, 0, RTE_REG_EDX, 9) + FEAT_DEF(SEP, 0x00000001, 0, RTE_REG_EDX, 11) + FEAT_DEF(MTRR, 0x00000001, 0, RTE_REG_EDX, 12) + FEAT_DEF(PGE, 0x00000001, 0, RTE_REG_EDX, 13) + FEAT_DEF(MCA, 0x00000001, 0, RTE_REG_EDX, 14) + FEAT_DEF(CMOV, 0x00000001, 0, RTE_REG_EDX, 15) + FEAT_DEF(PAT, 0x00000001, 0, RTE_REG_EDX, 16) + FEAT_DEF(PSE36, 0x00000001, 0, RTE_REG_EDX, 17) + FEAT_DEF(PSN, 0x00000001, 0, RTE_REG_EDX, 18) + FEAT_DEF(CLFSH, 0x00000001, 0, RTE_REG_EDX, 19) + FEAT_DEF(DS, 0x00000001, 0, RTE_REG_EDX, 21) + FEAT_DEF(ACPI, 0x00000001, 0, RTE_REG_EDX, 22) + FEAT_DEF(MMX, 0x00000001, 0, RTE_REG_EDX, 23) + FEAT_DEF(FXSR, 0x00000001, 0, RTE_REG_EDX, 24) + FEAT_DEF(SSE, 0x00000001, 0, RTE_REG_EDX, 25) + FEAT_DEF(SSE2, 0x00000001, 0, RTE_REG_EDX, 26) + FEAT_DEF(SS, 0x00000001, 0, RTE_REG_EDX, 27) + FEAT_DEF(HTT, 0x00000001, 0, RTE_REG_EDX, 28) + FEAT_DEF(TM, 0x00000001, 0, RTE_REG_EDX, 29) + FEAT_DEF(PBE, 0x00000001, 0, RTE_REG_EDX, 31) + + FEAT_DEF(DIGTEMP, 0x00000006, 0, RTE_REG_EAX, 0) + FEAT_DEF(TRBOBST, 0x00000006, 0, RTE_REG_EAX, 1) + FEAT_DEF(ARAT, 0x00000006, 0, RTE_REG_EAX, 2) + FEAT_DEF(PLN, 0x00000006, 0, RTE_REG_EAX, 4) + FEAT_DEF(ECMD, 0x00000006, 0, RTE_REG_EAX, 5) + FEAT_DEF(PTM, 0x00000006, 0, RTE_REG_EAX, 6) + + FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, RTE_REG_ECX, 0) + FEAT_DEF(ACNT2, 0x00000006, 0, RTE_REG_ECX, 1) + FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX, 3) + + FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX, 0) + FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 2) + FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX, 4) + FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX, 5) + FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 6) + FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 7) + FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 8) + FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10) + FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11) + FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16) + + FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX, 0) + FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX, 4) + + FEAT_DEF(SYSCALL, 0x80000001, 0, RTE_REG_EDX, 11) + FEAT_DEF(XD, 0x80000001, 0, RTE_REG_EDX, 20) + FEAT_DEF(1GB_PG, 0x80000001, 0, RTE_REG_EDX, 26) + FEAT_DEF(RDTSCP, 0x80000001, 0, RTE_REG_EDX, 27) + FEAT_DEF(EM64T, 0x80000001, 0, RTE_REG_EDX, 29) + + FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8) +}; + +/* + * Execute CPUID instruction and get contents of a specific register + * + * This function, when compiled with GCC, will generate architecture-neutral + * code, as per GCC manual. + */ +static void +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out) +{ +#if defined(__i386__) && defined(__PIC__) + /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ + asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" + : "=r" (out[RTE_REG_EBX]), + "=a" (out[RTE_REG_EAX]), + "=c" (out[RTE_REG_ECX]), + "=d" (out[RTE_REG_EDX]) + : "a" (leaf), "c" (subleaf)); +#else + asm volatile("cpuid" + : "=a" (out[RTE_REG_EAX]), + "=b" (out[RTE_REG_EBX]), + "=c" (out[RTE_REG_ECX]), + "=d" (out[RTE_REG_EDX]) + : "a" (leaf), "c" (subleaf)); +#endif +} + +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) +{ + const struct feature_entry *feat; + cpuid_registers_t regs; + + if (feature >= RTE_CPUFLAG_NUMFLAGS) + /* Flag does not match anything in the feature tables */ + return -ENOENT; + + feat = &rte_cpu_feature_table[feature]; + + if (!feat->leaf) + /* This entry in the table wasn't filled out! */ + return -EFAULT; + + rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs); + if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) || + regs[RTE_REG_EAX] < feat->leaf) + return 0; + + /* get the cpuid leaf containing the desired feature */ + rte_cpu_get_features(feat->leaf, feat->subleaf, regs); + + /* check if the feature is enabled */ + return (regs[feat->reg] >> feat->bit) & 1; +} + +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) +{ + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return NULL; + return rte_cpu_feature_table[feature].name; +} diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c new file mode 100644 index 00000000..ecb12409 --- /dev/null +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include + +/** + * Checks if the machine is adequate for running the binary. If it is not, the + * program exits with status 1. + * The function attribute forces this function to be called before main(). But + * with ICC, the check is generated by the compiler. + */ +#ifndef __INTEL_COMPILER +void __attribute__ ((__constructor__)) +#else +void +#endif +rte_cpu_check_supported(void) +{ + /* This is generated at compile-time by the build system */ + static const enum rte_cpu_flag_t compile_time_flags[] = { + RTE_COMPILE_TIME_CPUFLAGS + }; + unsigned count = RTE_DIM(compile_time_flags), i; + int ret; + + for (i = 0; i < count; i++) { + ret = rte_cpu_get_flag_enabled(compile_time_flags[i]); + + if (ret < 0) { + fprintf(stderr, + "ERROR: CPU feature flag lookup failed with error %d\n", + ret); + exit(1); + } + if (!ret) { + fprintf(stderr, + "ERROR: This system does not support \"%s\".\n" + "Please check that RTE_MACHINE is set correctly.\n", + rte_cpu_get_flag_name(compile_time_flags[i])); + exit(1); + } + } +} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c new file mode 100644 index 00000000..a8a4146c --- /dev/null +++ b/lib/librte_eal/common/eal_common_dev.c @@ -0,0 +1,152 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" + +/** Global list of device drivers. */ +static struct rte_driver_list dev_driver_list = + TAILQ_HEAD_INITIALIZER(dev_driver_list); + +/* register a driver */ +void +rte_eal_driver_register(struct rte_driver *driver) +{ + TAILQ_INSERT_TAIL(&dev_driver_list, driver, next); +} + +/* unregister a driver */ +void +rte_eal_driver_unregister(struct rte_driver *driver) +{ + TAILQ_REMOVE(&dev_driver_list, driver, next); +} + +int +rte_eal_vdev_init(const char *name, const char *args) +{ + struct rte_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_VDEV) + continue; + + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "eth_pcap", but "name" will be "eth_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->name, name, strlen(driver->name))) + return driver->init(name, args); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} + +int +rte_eal_dev_init(void) +{ + struct rte_devargs *devargs; + struct rte_driver *driver; + + /* + * Note that the dev_driver_list is populated here + * from calls made to rte_eal_driver_register from constructor functions + * embedded into PMD modules via the PMD_REGISTER_DRIVER macro + */ + + /* call the init function for each virtual device */ + TAILQ_FOREACH(devargs, &devargs_list, next) { + + if (devargs->type != RTE_DEVTYPE_VIRTUAL) + continue; + + if (rte_eal_vdev_init(devargs->virt.drv_name, + devargs->args)) { + RTE_LOG(ERR, EAL, "failed to initialize %s device\n", + devargs->virt.drv_name); + return -1; + } + } + + /* Once the vdevs are initalized, start calling all the pdev drivers */ + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_PDEV) + continue; + /* PDEV drivers don't get passed any parameters */ + driver->init(NULL, NULL); + } + return 0; +} + +int +rte_eal_vdev_uninit(const char *name) +{ + struct rte_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_VDEV) + continue; + + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "eth_pcap", but "name" will be "eth_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->name, name, strlen(driver->name))) + return driver->uninit(name); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c new file mode 100644 index 00000000..2bfe54a1 --- /dev/null +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -0,0 +1,176 @@ +/*- + * BSD LICENSE + * + * Copyright 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* This file manages the list of devices and their arguments, as given + * by the user at startup + * + * Code here should not call rte_log since the EAL environment + * may not be initialized. + */ + +#include +#include + +#include +#include +#include "eal_private.h" + +/** Global list of user devices */ +struct rte_devargs_list devargs_list = + TAILQ_HEAD_INITIALIZER(devargs_list); + +int +rte_eal_parse_devargs_str(const char *devargs_str, + char **drvname, char **drvargs) +{ + char *sep; + + if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL)) + return -1; + + *drvname = strdup(devargs_str); + if (drvname == NULL) + return -1; + + /* set the first ',' to '\0' to split name and arguments */ + sep = strchr(*drvname, ','); + if (sep != NULL) { + sep[0] = '\0'; + *drvargs = strdup(sep + 1); + } else { + *drvargs = strdup(""); + } + + if (*drvargs == NULL) { + free(*drvname); + return -1; + } + return 0; +} + +/* store a whitelist parameter for later parsing */ +int +rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str) +{ + struct rte_devargs *devargs = NULL; + char *buf = NULL; + int ret; + + /* use malloc instead of rte_malloc as it's called early at init */ + devargs = malloc(sizeof(*devargs)); + if (devargs == NULL) + goto fail; + + memset(devargs, 0, sizeof(*devargs)); + devargs->type = devtype; + + if (rte_eal_parse_devargs_str(devargs_str, &buf, &devargs->args)) + goto fail; + + switch (devargs->type) { + case RTE_DEVTYPE_WHITELISTED_PCI: + case RTE_DEVTYPE_BLACKLISTED_PCI: + /* try to parse pci identifier */ + if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 && + eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0) + goto fail; + + break; + case RTE_DEVTYPE_VIRTUAL: + /* save driver name */ + ret = snprintf(devargs->virt.drv_name, + sizeof(devargs->virt.drv_name), "%s", buf); + if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) + goto fail; + + break; + } + + free(buf); + TAILQ_INSERT_TAIL(&devargs_list, devargs, next); + return 0; + +fail: + free(buf); + if (devargs) { + free(devargs->args); + free(devargs); + } + + return -1; +} + +/* count the number of devices of a specified type */ +unsigned int +rte_eal_devargs_type_count(enum rte_devtype devtype) +{ + struct rte_devargs *devargs; + unsigned int count = 0; + + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type != devtype) + continue; + count++; + } + return count; +} + +/* dump the user devices on the console */ +void +rte_eal_devargs_dump(FILE *f) +{ + struct rte_devargs *devargs; + + fprintf(f, "User device white list:\n"); + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) + fprintf(f, " PCI whitelist " PCI_PRI_FMT " %s\n", + devargs->pci.addr.domain, + devargs->pci.addr.bus, + devargs->pci.addr.devid, + devargs->pci.addr.function, + devargs->args); + else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) + fprintf(f, " PCI blacklist " PCI_PRI_FMT " %s\n", + devargs->pci.addr.domain, + devargs->pci.addr.bus, + devargs->pci.addr.devid, + devargs->pci.addr.function, + devargs->args); + else if (devargs->type == RTE_DEVTYPE_VIRTUAL) + fprintf(f, " VIRTUAL %s %s\n", + devargs->virt.drv_name, + devargs->args); + else + fprintf(f, " UNKNOWN %s\n", devargs->args); + } +} diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c new file mode 100644 index 00000000..de48d8e4 --- /dev/null +++ b/lib/librte_eal/common/eal_common_errno.c @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +RTE_DEFINE_PER_LCORE(int, _rte_errno); + +const char * +rte_strerror(int errnum) +{ +#define RETVAL_SZ 256 + static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval); + + /* since some implementations of strerror_r throw an error + * themselves if errnum is too big, we handle that case here */ + if (errnum > RTE_MAX_ERRNO) + snprintf(RTE_PER_LCORE(retval), RETVAL_SZ, +#ifdef RTE_EXEC_ENV_BSDAPP + "Unknown error: %d", errnum); +#else + "Unknown error %d", errnum); +#endif + else + switch (errnum){ + case E_RTE_SECONDARY: + return "Invalid call in secondary process"; + case E_RTE_NO_CONFIG: + return "Missing rte_config structure"; + default: + strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ); + } + + return RTE_PER_LCORE(retval); +} diff --git a/lib/librte_eal/common/eal_common_hexdump.c b/lib/librte_eal/common/eal_common_hexdump.c new file mode 100644 index 00000000..d5cbd703 --- /dev/null +++ b/lib/librte_eal/common/eal_common_hexdump.c @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include + +#define LINE_LEN 128 + +/**************************************************************************//** +* +* rte_hexdump - Dump out memory in a special hex dump format. +* +* DESCRIPTION +* Dump out the message buffer in a special hex dump output format with characters +* printed for each line of 16 hex values. +* +* RETURNS: N/A +* +* SEE ALSO: +*/ + +void +rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len) +{ + unsigned int i, out, ofs; + const unsigned char *data = buf; + char line[LINE_LEN]; /* space needed 8+16*3+3+16 == 75 */ + + fprintf(f, "%s at [%p], len=%u\n", (title)? title : " Dump data", data, len); + ofs = 0; + while (ofs < len) { + /* format the line in the buffer, then use printf to output to screen */ + out = snprintf(line, LINE_LEN, "%08X:", ofs); + for (i = 0; ((ofs + i) < len) && (i < 16); i++) + out += snprintf(line+out, LINE_LEN - out, " %02X", (data[ofs+i] & 0xff)); + for(; i <= 16; i++) + out += snprintf(line+out, LINE_LEN - out, " | "); + for(i = 0; (ofs < len) && (i < 16); i++, ofs++) { + unsigned char c = data[ofs]; + if ( (c < ' ') || (c > '~')) + c = '.'; + out += snprintf(line+out, LINE_LEN - out, "%c", c); + } + fprintf(f, "%s\n", line); + } + fflush(f); +} + +/**************************************************************************//** +* +* rte_memdump - Dump out memory in hex bytes with colons. +* +* DESCRIPTION +* Dump out the message buffer in hex bytes with colons xx:xx:xx:xx:... +* +* RETURNS: N/A +* +* SEE ALSO: +*/ + +void +rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len) +{ + unsigned int i, out; + const unsigned char *data = buf; + char line[LINE_LEN]; + + if ( title ) + fprintf(f, "%s: ", title); + + line[0] = '\0'; + for (i = 0, out = 0; i < len; i++) { + // Make sure we do not overrun the line buffer length. + if ( out >= (LINE_LEN - 4) ) { + fprintf(f, "%s", line); + out = 0; + line[out] = '\0'; + } + out += snprintf(line+out, LINE_LEN - out, "%02x%s", + (data[i] & 0xff), ((i+1) < len)? ":" : ""); + } + if ( out > 0 ) + fprintf(f, "%s", line); + fprintf(f, "\n"); + + fflush(f); +} diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c new file mode 100644 index 00000000..229c3a03 --- /dev/null +++ b/lib/librte_eal/common/eal_common_launch.c @@ -0,0 +1,118 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * Wait until a lcore finished its job. + */ +int +rte_eal_wait_lcore(unsigned slave_id) +{ + if (lcore_config[slave_id].state == WAIT) + return 0; + + while (lcore_config[slave_id].state != WAIT && + lcore_config[slave_id].state != FINISHED); + + rte_rmb(); + + /* we are in finished state, go to wait state */ + lcore_config[slave_id].state = WAIT; + return lcore_config[slave_id].ret; +} + +/* + * Check that every SLAVE lcores are in WAIT state, then call + * rte_eal_remote_launch() for all of them. If call_master is true + * (set to CALL_MASTER), also call the function on the master lcore. + */ +int +rte_eal_mp_remote_launch(int (*f)(void *), void *arg, + enum rte_rmt_call_master_t call_master) +{ + int lcore_id; + int master = rte_get_master_lcore(); + + /* check state of lcores */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (lcore_config[lcore_id].state != WAIT) + return -EBUSY; + } + + /* send messages to cores */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(f, arg, lcore_id); + } + + if (call_master == CALL_MASTER) { + lcore_config[master].ret = f(arg); + lcore_config[master].state = FINISHED; + } + + return 0; +} + +/* + * Return the state of the lcore identified by slave_id. + */ +enum rte_lcore_state_t +rte_eal_get_lcore_state(unsigned lcore_id) +{ + return lcore_config[lcore_id].state; +} + +/* + * Do a rte_eal_wait_lcore() for every lcore. The return values are + * ignored. + */ +void +rte_eal_mp_wait_lcore(void) +{ + unsigned lcore_id; + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_wait_lcore(lcore_id); + } +} diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c new file mode 100644 index 00000000..a4263ba5 --- /dev/null +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +/* + * Parse /sys/devices/system/cpu to get the number of physical and logical + * processors on the machine. The function will fill the cpu_info + * structure. + */ +int +rte_eal_cpu_init(void) +{ + /* pointer to global configuration */ + struct rte_config *config = rte_eal_get_configuration(); + unsigned lcore_id; + unsigned count = 0; + + /* + * Parse the maximum set of logical cores, detect the subset of running + * ones and enable them by default. + */ + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + lcore_config[lcore_id].core_index = count; + + /* init cpuset for per lcore config */ + CPU_ZERO(&lcore_config[lcore_id].cpuset); + + /* in 1:1 mapping, record related cpu detected state */ + lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id); + if (lcore_config[lcore_id].detected == 0) { + config->lcore_role[lcore_id] = ROLE_OFF; + lcore_config[lcore_id].core_index = -1; + continue; + } + + /* By default, lcore 1:1 map to cpu id */ + CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset); + + /* By default, each detected core is enabled */ + config->lcore_role[lcore_id] = ROLE_RTE; + lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); + lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); + if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) +#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID + lcore_config[lcore_id].socket_id = 0; +#else + rte_panic("Socket ID (%u) is greater than " + "RTE_MAX_NUMA_NODES (%d)\n", + lcore_config[lcore_id].socket_id, + RTE_MAX_NUMA_NODES); +#endif + + RTE_LOG(DEBUG, EAL, "Detected lcore %u as " + "core %u on socket %u\n", + lcore_id, lcore_config[lcore_id].core_id, + lcore_config[lcore_id].socket_id); + count++; + } + /* Set the count of enabled logical cores of the EAL configuration */ + config->lcore_count = count; + RTE_LOG(DEBUG, EAL, + "Support maximum %u logical core(s) by configuration.\n", + RTE_MAX_LCORE); + RTE_LOG(DEBUG, EAL, "Detected %u lcore(s)\n", config->lcore_count); + + return 0; +} diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c new file mode 100644 index 00000000..1ae8de70 --- /dev/null +++ b/lib/librte_eal/common/eal_common_log.c @@ -0,0 +1,337 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +#define LOG_ELT_SIZE 2048 + +#define LOG_HISTORY_MP_NAME "log_history" + +STAILQ_HEAD(log_history_list, log_history); + +/** + * The structure of a message log in the log history. + */ +struct log_history { + STAILQ_ENTRY(log_history) next; + unsigned size; + char buf[0]; +}; + +static struct rte_mempool *log_history_mp = NULL; +static unsigned log_history_size = 0; +static struct log_history_list log_history; + +/* global log structure */ +struct rte_logs rte_logs = { + .type = ~0, + .level = RTE_LOG_DEBUG, + .file = NULL, +}; + +static rte_spinlock_t log_dump_lock = RTE_SPINLOCK_INITIALIZER; +static rte_spinlock_t log_list_lock = RTE_SPINLOCK_INITIALIZER; +static FILE *default_log_stream; +static int history_enabled = 1; + +/** + * This global structure stores some informations about the message + * that is currently beeing processed by one lcore + */ +struct log_cur_msg { + uint32_t loglevel; /**< log level - see rte_log.h */ + uint32_t logtype; /**< log type - see rte_log.h */ +} __rte_cache_aligned; +static struct log_cur_msg log_cur_msg[RTE_MAX_LCORE]; /**< per core log */ + + +/* default logs */ + +int +rte_log_add_in_history(const char *buf, size_t size) +{ + struct log_history *hist_buf = NULL; + static const unsigned hist_buf_size = LOG_ELT_SIZE - sizeof(*hist_buf); + void *obj; + + if (history_enabled == 0) + return 0; + + rte_spinlock_lock(&log_list_lock); + + /* get a buffer for adding in history */ + if (log_history_size > RTE_LOG_HISTORY) { + hist_buf = STAILQ_FIRST(&log_history); + if (hist_buf) { + STAILQ_REMOVE_HEAD(&log_history, next); + log_history_size--; + } + } + else { + if (rte_mempool_mc_get(log_history_mp, &obj) < 0) + obj = NULL; + hist_buf = obj; + } + + /* no buffer */ + if (hist_buf == NULL) { + rte_spinlock_unlock(&log_list_lock); + return -ENOBUFS; + } + + /* not enough room for msg, buffer go back in mempool */ + if (size >= hist_buf_size) { + rte_mempool_mp_put(log_history_mp, hist_buf); + rte_spinlock_unlock(&log_list_lock); + return -ENOBUFS; + } + + /* add in history */ + memcpy(hist_buf->buf, buf, size); + hist_buf->buf[size] = hist_buf->buf[hist_buf_size-1] = '\0'; + hist_buf->size = size; + STAILQ_INSERT_TAIL(&log_history, hist_buf, next); + log_history_size++; + rte_spinlock_unlock(&log_list_lock); + + return 0; +} + +void +rte_log_set_history(int enable) +{ + history_enabled = enable; +} + +/* Change the stream that will be used by logging system */ +int +rte_openlog_stream(FILE *f) +{ + if (f == NULL) + rte_logs.file = default_log_stream; + else + rte_logs.file = f; + return 0; +} + +/* Set global log level */ +void +rte_set_log_level(uint32_t level) +{ + rte_logs.level = (uint32_t)level; +} + +/* Get global log level */ +uint32_t +rte_get_log_level(void) +{ + return rte_logs.level; +} + +/* Set global log type */ +void +rte_set_log_type(uint32_t type, int enable) +{ + if (enable) + rte_logs.type |= type; + else + rte_logs.type &= (~type); +} + +/* Get global log type */ +uint32_t +rte_get_log_type(void) +{ + return rte_logs.type; +} + +/* get the current loglevel for the message beeing processed */ +int rte_log_cur_msg_loglevel(void) +{ + unsigned lcore_id; + lcore_id = rte_lcore_id(); + if (lcore_id >= RTE_MAX_LCORE) + return rte_get_log_level(); + return log_cur_msg[lcore_id].loglevel; +} + +/* get the current logtype for the message beeing processed */ +int rte_log_cur_msg_logtype(void) +{ + unsigned lcore_id; + lcore_id = rte_lcore_id(); + if (lcore_id >= RTE_MAX_LCORE) + return rte_get_log_type(); + return log_cur_msg[lcore_id].logtype; +} + +/* Dump log history to file */ +void +rte_log_dump_history(FILE *out) +{ + struct log_history_list tmp_log_history; + struct log_history *hist_buf; + unsigned i; + + /* only one dump at a time */ + rte_spinlock_lock(&log_dump_lock); + + /* save list, and re-init to allow logging during dump */ + rte_spinlock_lock(&log_list_lock); + tmp_log_history = log_history; + STAILQ_INIT(&log_history); + log_history_size = 0; + rte_spinlock_unlock(&log_list_lock); + + for (i=0; ibuf, hist_buf->size, 1, out) == 0) { + rte_mempool_mp_put(log_history_mp, hist_buf); + break; + } + + /* put back message structure in pool */ + rte_mempool_mp_put(log_history_mp, hist_buf); + } + fflush(out); + + rte_spinlock_unlock(&log_dump_lock); +} + +/* + * Generates a log message The message will be sent in the stream + * defined by the previous call to rte_openlog_stream(). + */ +int +rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) +{ + int ret; + FILE *f = rte_logs.file; + unsigned lcore_id; + + if ((level > rte_logs.level) || !(logtype & rte_logs.type)) + return 0; + + /* save loglevel and logtype in a global per-lcore variable */ + lcore_id = rte_lcore_id(); + if (lcore_id < RTE_MAX_LCORE) { + log_cur_msg[lcore_id].loglevel = level; + log_cur_msg[lcore_id].logtype = logtype; + } + + ret = vfprintf(f, format, ap); + fflush(f); + return ret; +} + +/* + * Generates a log message The message will be sent in the stream + * defined by the previous call to rte_openlog_stream(). + * No need to check level here, done by rte_vlog(). + */ +int +rte_log(uint32_t level, uint32_t logtype, const char *format, ...) +{ + va_list ap; + int ret; + + va_start(ap, format); + ret = rte_vlog(level, logtype, format, ap); + va_end(ap); + return ret; +} + +/* + * called by environment-specific log init function to initialize log + * history + */ +int +rte_eal_common_log_init(FILE *default_log) +{ + STAILQ_INIT(&log_history); + + /* reserve RTE_LOG_HISTORY*2 elements, so we can dump and + * keep logging during this time */ + log_history_mp = rte_mempool_create(LOG_HISTORY_MP_NAME, RTE_LOG_HISTORY*2, + LOG_ELT_SIZE, 0, 0, + NULL, NULL, + NULL, NULL, + SOCKET_ID_ANY, 0); + + if ((log_history_mp == NULL) && + ((log_history_mp = rte_mempool_lookup(LOG_HISTORY_MP_NAME)) == NULL)){ + RTE_LOG(ERR, EAL, "%s(): cannot create log_history mempool\n", + __func__); + return -1; + } + + default_log_stream = default_log; + rte_openlog_stream(default_log); + return 0; +} diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c new file mode 100644 index 00000000..6155752e --- /dev/null +++ b/lib/librte_eal/common/eal_common_memory.c @@ -0,0 +1,154 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +/* + * Return a pointer to a read-only table of struct rte_physmem_desc + * elements, containing the layout of all addressable physical + * memory. The last element of the table contains a NULL address. + */ +const struct rte_memseg * +rte_eal_get_physmem_layout(void) +{ + return rte_eal_get_configuration()->mem_config->memseg; +} + + +/* get the total size of memory */ +uint64_t +rte_eal_get_physmem_size(void) +{ + const struct rte_mem_config *mcfg; + unsigned i = 0; + uint64_t total_len = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (mcfg->memseg[i].addr == NULL) + break; + + total_len += mcfg->memseg[i].len; + } + + return total_len; +} + +/* Dump the physical memory layout on console */ +void +rte_dump_physmem_layout(FILE *f) +{ + const struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (mcfg->memseg[i].addr == NULL) + break; + + fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, " + "virt:%p, socket_id:%"PRId32", " + "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " + "nrank:%"PRIx32"\n", i, + mcfg->memseg[i].phys_addr, + mcfg->memseg[i].len, + mcfg->memseg[i].addr, + mcfg->memseg[i].socket_id, + mcfg->memseg[i].hugepage_sz, + mcfg->memseg[i].nchannel, + mcfg->memseg[i].nrank); + } +} + +/* return the number of memory channels */ +unsigned rte_memory_get_nchannel(void) +{ + return rte_eal_get_configuration()->mem_config->nchannel; +} + +/* return the number of memory rank */ +unsigned rte_memory_get_nrank(void) +{ + return rte_eal_get_configuration()->mem_config->nrank; +} + +static int +rte_eal_memdevice_init(void) +{ + struct rte_config *config; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + config = rte_eal_get_configuration(); + config->mem_config->nchannel = internal_config.force_nchannel; + config->mem_config->nrank = internal_config.force_nrank; + + return 0; +} + +/* init memory subsystem */ +int +rte_eal_memory_init(void) +{ + RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n"); + + const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? + rte_eal_hugepage_init() : + rte_eal_hugepage_attach(); + if (retval < 0) + return -1; + + if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0) + return -1; + + return 0; +} diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c new file mode 100644 index 00000000..711c8457 --- /dev/null +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -0,0 +1,445 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_heap.h" +#include "malloc_elem.h" +#include "eal_private.h" + +static inline const struct rte_memzone * +memzone_lookup_thread_unsafe(const char *name) +{ + const struct rte_mem_config *mcfg; + const struct rte_memzone *mz; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* + * the algorithm is not optimal (linear), but there are few + * zones and this function should be called at init only + */ + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + mz = &mcfg->memzone[i]; + if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) + return &mcfg->memzone[i]; + } + + return NULL; +} + +static inline struct rte_memzone * +get_next_free_memzone(void) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + if (mcfg->memzone[i].addr == NULL) + return &mcfg->memzone[i]; + } + + return NULL; +} + +/* This function will return the greatest free block if a heap has been + * specified. If no heap has been specified, it will return the heap and + * length of the greatest free block available in all heaps */ +static size_t +find_heap_max_free_elem(int *s, unsigned align) +{ + struct rte_mem_config *mcfg; + struct rte_malloc_socket_stats stats; + int i, socket = *s; + size_t len = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if ((socket != SOCKET_ID_ANY) && (socket != i)) + continue; + + malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); + if (stats.greatest_free_size > len) { + len = stats.greatest_free_size; + *s = i; + } + } + + return len - MALLOC_ELEM_OVERHEAD - align; +} + +static const struct rte_memzone * +memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, + int socket_id, unsigned flags, unsigned align, unsigned bound) +{ + struct rte_mem_config *mcfg; + size_t requested_len; + int socket, i; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* no more room in config */ + if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { + RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); + rte_errno = ENOSPC; + return NULL; + } + + /* zone already exist */ + if ((memzone_lookup_thread_unsafe(name)) != NULL) { + RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n", + __func__, name); + rte_errno = EEXIST; + return NULL; + } + + /* if alignment is not a power of two */ + if (align && !rte_is_power_of_2(align)) { + RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__, + align); + rte_errno = EINVAL; + return NULL; + } + + /* alignment less than cache size is not allowed */ + if (align < RTE_CACHE_LINE_SIZE) + align = RTE_CACHE_LINE_SIZE; + + /* align length on cache boundary. Check for overflow before doing so */ + if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { + rte_errno = EINVAL; /* requested size too big */ + return NULL; + } + + len += RTE_CACHE_LINE_MASK; + len &= ~((size_t) RTE_CACHE_LINE_MASK); + + /* save minimal requested length */ + requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); + + /* check that boundary condition is valid */ + if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { + rte_errno = EINVAL; + return NULL; + } + + if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) { + rte_errno = EINVAL; + return NULL; + } + + if (!rte_eal_has_hugepages()) + socket_id = SOCKET_ID_ANY; + + if (len == 0) { + if (bound != 0) + requested_len = bound; + else + requested_len = find_heap_max_free_elem(&socket_id, align); + } + + if (socket_id == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_id; + + /* allocate memory on heap */ + void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, + requested_len, flags, align, bound); + + if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { + /* try other heaps */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if (socket == i) + continue; + + mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], + NULL, requested_len, flags, align, bound); + if (mz_addr != NULL) + break; + } + } + + if (mz_addr == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); + + /* fill the zone in config */ + struct rte_memzone *mz = get_next_free_memzone(); + + if (mz == NULL) { + RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " + "in config!\n", __func__); + rte_errno = ENOSPC; + return NULL; + } + + mcfg->memzone_cnt++; + snprintf(mz->name, sizeof(mz->name), "%s", name); + mz->phys_addr = rte_malloc_virt2phy(mz_addr); + mz->addr = mz_addr; + mz->len = (requested_len == 0 ? elem->size : requested_len); + mz->hugepage_sz = elem->ms->hugepage_sz; + mz->socket_id = elem->ms->socket_id; + mz->flags = 0; + mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg; + + return mz; +} + +static const struct rte_memzone * +rte_memzone_reserve_thread_safe(const char *name, size_t len, + int socket_id, unsigned flags, unsigned align, + unsigned bound) +{ + struct rte_mem_config *mcfg; + const struct rte_memzone *mz = NULL; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_write_lock(&mcfg->mlock); + + mz = memzone_reserve_aligned_thread_unsafe( + name, len, socket_id, flags, align, bound); + + rte_rwlock_write_unlock(&mcfg->mlock); + + return mz; +} + +/* + * Return a pointer to a correctly filled memzone descriptor (with a + * specified alignment and boundary). If the allocation cannot be done, + * return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id, + unsigned flags, unsigned align, unsigned bound) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, + align, bound); +} + +/* + * Return a pointer to a correctly filled memzone descriptor (with a + * specified alignment). If the allocation cannot be done, return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id, + unsigned flags, unsigned align) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, + align, 0); +} + +/* + * Return a pointer to a correctly filled memzone descriptor. If the + * allocation cannot be done, return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve(const char *name, size_t len, int socket_id, + unsigned flags) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, + flags, RTE_CACHE_LINE_SIZE, 0); +} + +int +rte_memzone_free(const struct rte_memzone *mz) +{ + struct rte_mem_config *mcfg; + int ret = 0; + void *addr; + unsigned idx; + + if (mz == NULL) + return -EINVAL; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_write_lock(&mcfg->mlock); + + idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); + idx = idx / sizeof(struct rte_memzone); + + addr = mcfg->memzone[idx].addr; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot + * free it. + */ + if (mcfg->memzone[idx].ioremap_addr != 0) + ret = -EINVAL; +#endif + if (addr == NULL) + ret = -EINVAL; + else if (mcfg->memzone_cnt == 0) { + rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", + __func__); + } else { + memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx])); + mcfg->memzone_cnt--; + } + + rte_rwlock_write_unlock(&mcfg->mlock); + + rte_free(addr); + + return ret; +} + +/* + * Lookup for the memzone identified by the given name + */ +const struct rte_memzone * +rte_memzone_lookup(const char *name) +{ + struct rte_mem_config *mcfg; + const struct rte_memzone *memzone = NULL; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + + memzone = memzone_lookup_thread_unsafe(name); + + rte_rwlock_read_unlock(&mcfg->mlock); + + return memzone; +} + +/* Dump all reserved memory zones on console */ +void +rte_memzone_dump(FILE *f) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + /* dump all zones */ + for (i=0; imemzone[i].addr == NULL) + break; + fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx" + ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, + mcfg->memzone[i].name, + mcfg->memzone[i].phys_addr, + mcfg->memzone[i].len, + mcfg->memzone[i].addr, + mcfg->memzone[i].socket_id, + mcfg->memzone[i].flags); + } + rte_rwlock_read_unlock(&mcfg->mlock); +} + +/* + * Init the memzone subsystem + */ +int +rte_eal_memzone_init(void) +{ + struct rte_mem_config *mcfg; + const struct rte_memseg *memseg; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* secondary processes don't need to initialise anything */ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + memseg = rte_eal_get_physmem_layout(); + if (memseg == NULL) { + RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__); + return -1; + } + + rte_rwlock_write_lock(&mcfg->mlock); + + /* delete all zones */ + mcfg->memzone_cnt = 0; + memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); + + rte_rwlock_write_unlock(&mcfg->mlock); + + return rte_eal_malloc_heap_init(); +} + +/* Walk all reserved memory zones */ +void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *), + void *arg) +{ + struct rte_mem_config *mcfg; + unsigned i; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + for (i=0; imemzone[i].addr != NULL) + (*func)(&mcfg->memzone[i], arg); + } + rte_rwlock_read_unlock(&mcfg->mlock); +} diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c new file mode 100644 index 00000000..2b418d52 --- /dev/null +++ b/lib/librte_eal/common/eal_common_options.c @@ -0,0 +1,1022 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_internal_cfg.h" +#include "eal_options.h" +#include "eal_filesystem.h" + +#define BITS_PER_HEX 4 + +const char +eal_short_options[] = + "b:" /* pci-blacklist */ + "c:" /* coremask */ + "d:" /* driver */ + "h" /* help */ + "l:" /* corelist */ + "m:" /* memory size */ + "n:" /* memory channels */ + "r:" /* memory ranks */ + "v" /* version */ + "w:" /* pci-whitelist */ + ; + +const struct option +eal_long_options[] = { + {OPT_BASE_VIRTADDR, 1, NULL, OPT_BASE_VIRTADDR_NUM }, + {OPT_CREATE_UIO_DEV, 0, NULL, OPT_CREATE_UIO_DEV_NUM }, + {OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM }, + {OPT_HELP, 0, NULL, OPT_HELP_NUM }, + {OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM }, + {OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM }, + {OPT_LCORES, 1, NULL, OPT_LCORES_NUM }, + {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM }, + {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM }, + {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM }, + {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, + {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, + {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM }, + {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM }, + {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM }, + {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM }, + {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM }, + {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM }, + {OPT_VDEV, 1, NULL, OPT_VDEV_NUM }, + {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, + {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, + {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, + {0, 0, NULL, 0 } +}; + +TAILQ_HEAD(shared_driver_list, shared_driver); + +/* Definition for shared object drivers. */ +struct shared_driver { + TAILQ_ENTRY(shared_driver) next; + + char name[PATH_MAX]; + void* lib_handle; +}; + +/* List of external loadable drivers */ +static struct shared_driver_list solib_list = +TAILQ_HEAD_INITIALIZER(solib_list); + +/* Default path of external loadable drivers */ +static const char *default_solib_dir = RTE_EAL_PMD_PATH; + +static int master_lcore_parsed; +static int mem_parsed; + +void +eal_reset_internal_config(struct internal_config *internal_cfg) +{ + int i; + + internal_cfg->memory = 0; + internal_cfg->force_nrank = 0; + internal_cfg->force_nchannel = 0; + internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT; + internal_cfg->hugepage_dir = NULL; + internal_cfg->force_sockets = 0; + /* zero out the NUMA config */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->socket_mem[i] = 0; + /* zero out hugedir descriptors */ + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + internal_cfg->hugepage_info[i].lock_descriptor = -1; + internal_cfg->base_virtaddr = 0; + + internal_cfg->syslog_facility = LOG_DAEMON; + /* default value from build option */ + internal_cfg->log_level = RTE_LOG_LEVEL; + + internal_cfg->xen_dom0_support = 0; + + /* if set to NONE, interrupt mode is determined automatically */ + internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE; + +#ifdef RTE_LIBEAL_USE_HPET + internal_cfg->no_hpet = 0; +#else + internal_cfg->no_hpet = 1; +#endif + internal_cfg->vmware_tsc_map = 0; + internal_cfg->create_uio_dev = 0; +} + +static int +eal_plugin_add(const char *path) +{ + struct shared_driver *solib; + + solib = malloc(sizeof(*solib)); + if (solib == NULL) { + RTE_LOG(ERR, EAL, "malloc(solib) failed\n"); + return -1; + } + memset(solib, 0, sizeof(*solib)); + strncpy(solib->name, path, PATH_MAX-1); + solib->name[PATH_MAX-1] = 0; + TAILQ_INSERT_TAIL(&solib_list, solib, next); + + return 0; +} + +static int +eal_plugindir_init(const char *path) +{ + DIR *d = NULL; + struct dirent *dent = NULL; + char sopath[PATH_MAX]; + + if (path == NULL || *path == '\0') + return 0; + + d = opendir(path); + if (d == NULL) { + RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n", + path, strerror(errno)); + return -1; + } + + while ((dent = readdir(d)) != NULL) { + struct stat sb; + + snprintf(sopath, PATH_MAX-1, "%s/%s", path, dent->d_name); + sopath[PATH_MAX-1] = 0; + + if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode))) + continue; + + if (eal_plugin_add(sopath) == -1) + break; + } + + closedir(d); + /* XXX this ignores failures from readdir() itself */ + return (dent == NULL) ? 0 : -1; +} + +int +eal_plugins_init(void) +{ + struct shared_driver *solib = NULL; + + if (*default_solib_dir != '\0') + eal_plugin_add(default_solib_dir); + + TAILQ_FOREACH(solib, &solib_list, next) { + struct stat sb; + + if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) { + if (eal_plugindir_init(solib->name) == -1) { + RTE_LOG(ERR, EAL, + "Cannot init plugin directory %s\n", + solib->name); + return -1; + } + } else { + RTE_LOG(DEBUG, EAL, "open shared lib %s\n", + solib->name); + solib->lib_handle = dlopen(solib->name, RTLD_NOW); + if (solib->lib_handle == NULL) { + RTE_LOG(ERR, EAL, "%s\n", dlerror()); + return -1; + } + } + + } + return 0; +} + +/* + * Parse the coremask given as argument (hexadecimal string) and fill + * the global configuration (core role and core count) with the parsed + * value. + */ +static int xdigit2val(unsigned char c) +{ + int val; + + if (isdigit(c)) + val = c - '0'; + else if (isupper(c)) + val = c - 'A' + 10; + else + val = c - 'a' + 10; + return val; +} + +static int +eal_parse_coremask(const char *coremask) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, j, idx = 0; + unsigned count = 0; + char c; + int val; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . + * Remove 0x/0X if exists. + */ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++) + { + if ((1 << j) & val) { + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, "lcore %u " + "unavailable\n", idx); + return -1; + } + cfg->lcore_role[idx] = ROLE_RTE; + lcore_config[idx].core_index = count; + count++; + } else { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + } + } + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + for (; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + if (count == 0) + return -1; + /* Update the count of enabled logical cores of the EAL configuration */ + cfg->lcore_count = count; + return 0; +} + +static int +eal_parse_corelist(const char *corelist) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, idx = 0; + unsigned count = 0; + char *end = NULL; + int min, max; + + if (corelist == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*corelist)) + corelist++; + i = strlen(corelist); + while ((i > 0) && isblank(corelist[i - 1])) + i--; + + /* Reset config */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + + /* Get list of cores */ + min = RTE_MAX_LCORE; + do { + while (isblank(*corelist)) + corelist++; + if (*corelist == '\0') + return -1; + errno = 0; + idx = strtoul(corelist, &end, 10); + if (errno || end == NULL) + return -1; + while (isblank(*end)) + end++; + if (*end == '-') { + min = idx; + } else if ((*end == ',') || (*end == '\0')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = min; idx <= max; idx++) { + if (cfg->lcore_role[idx] != ROLE_RTE) { + cfg->lcore_role[idx] = ROLE_RTE; + lcore_config[idx].core_index = count; + count++; + } + } + min = RTE_MAX_LCORE; + } else + return -1; + corelist = end + 1; + } while (*end != '\0'); + + if (count == 0) + return -1; + + /* Update the count of enabled logical cores of the EAL configuration */ + cfg->lcore_count = count; + + return 0; +} + +/* Changes the lcore id of the master thread */ +static int +eal_parse_master_lcore(const char *arg) +{ + char *parsing_end; + struct rte_config *cfg = rte_eal_get_configuration(); + + errno = 0; + cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0); + if (errno || parsing_end[0] != 0) + return -1; + if (cfg->master_lcore >= RTE_MAX_LCORE) + return -1; + master_lcore_parsed = 1; + return 0; +} + +/* + * Parse elem, the elem could be single number/range or '(' ')' group + * 1) A single number elem, it's just a simple digit. e.g. 9 + * 2) A single range elem, two digits with a '-' between. e.g. 2-6 + * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6) + * Within group elem, '-' used for a range separator; + * ',' used for a single number. + */ +static int +eal_parse_set(const char *input, uint16_t set[], unsigned num) +{ + unsigned idx; + const char *str = input; + char *end = NULL; + unsigned min, max; + + memset(set, 0, num * sizeof(uint16_t)); + + while (isblank(*str)) + str++; + + /* only digit or left bracket is qualify for start point */ + if ((!isdigit(*str) && *str != '(') || *str == '\0') + return -1; + + /* process single number or single range of number */ + if (*str != '(') { + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + else { + while (isblank(*end)) + end++; + + min = idx; + max = idx; + if (*end == '-') { + /* process single - */ + end++; + while (isblank(*end)) + end++; + if (!isdigit(*end)) + return -1; + + errno = 0; + idx = strtoul(end, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + max = idx; + while (isblank(*end)) + end++; + if (*end != ',' && *end != '\0') + return -1; + } + + if (*end != ',' && *end != '\0' && + *end != '@') + return -1; + + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) + set[idx] = 1; + + return end - input; + } + } + + /* process set within bracket */ + str++; + while (isblank(*str)) + str++; + if (*str == '\0') + return -1; + + min = RTE_MAX_LCORE; + do { + + /* go ahead to the first digit */ + while (isblank(*str)) + str++; + if (!isdigit(*str)) + return -1; + + /* get the digit value */ + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + + /* go ahead to separator '-',',' and ')' */ + while (isblank(*end)) + end++; + if (*end == '-') { + if (min == RTE_MAX_LCORE) + min = idx; + else /* avoid continuous '-' */ + return -1; + } else if ((*end == ',') || (*end == ')')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) + set[idx] = 1; + + min = RTE_MAX_LCORE; + } else + return -1; + + str = end + 1; + } while (*end != '\0' && *end != ')'); + + return str - input; +} + +/* convert from set array to cpuset bitmap */ +static int +convert_to_cpuset(rte_cpuset_t *cpusetp, + uint16_t *set, unsigned num) +{ + unsigned idx; + + CPU_ZERO(cpusetp); + + for (idx = 0; idx < num; idx++) { + if (!set[idx]) + continue; + + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, "core %u " + "unavailable\n", idx); + return -1; + } + + CPU_SET(idx, cpusetp); + } + + return 0; +} + +/* + * The format pattern: --lcores='[<,lcores[@cpus]>...]' + * lcores, cpus could be a single digit/range or a group. + * '(' and ')' are necessary if it's a group. + * If not supply '@cpus', the value of cpus uses the same as lcores. + * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below + * lcore 0 runs on cpuset 0x41 (cpu 0,6) + * lcore 1 runs on cpuset 0x2 (cpu 1) + * lcore 2 runs on cpuset 0xe0 (cpu 5,6,7) + * lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2) + * lcore 6 runs on cpuset 0x41 (cpu 0,6) + * lcore 7 runs on cpuset 0x80 (cpu 7) + * lcore 8 runs on cpuset 0x100 (cpu 8) + */ +static int +eal_parse_lcores(const char *lcores) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + static uint16_t set[RTE_MAX_LCORE]; + unsigned idx = 0; + int i; + unsigned count = 0; + const char *lcore_start = NULL; + const char *end = NULL; + int offset; + rte_cpuset_t cpuset; + int lflags = 0; + int ret = -1; + + if (lcores == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*lcores)) + lcores++; + i = strlen(lcores); + while ((i > 0) && isblank(lcores[i - 1])) + i--; + + CPU_ZERO(&cpuset); + + /* Reset lcore config */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + CPU_ZERO(&lcore_config[idx].cpuset); + } + + /* Get list of cores */ + do { + while (isblank(*lcores)) + lcores++; + if (*lcores == '\0') + goto err; + + /* record lcore_set start point */ + lcore_start = lcores; + + /* go across a complete bracket */ + if (*lcore_start == '(') { + lcores += strcspn(lcores, ")"); + if (*lcores++ == '\0') + goto err; + } + + /* scan the separator '@', ','(next) or '\0'(finish) */ + lcores += strcspn(lcores, "@,"); + + if (*lcores == '@') { + /* explicit assign cpu_set */ + offset = eal_parse_set(lcores + 1, set, RTE_DIM(set)); + if (offset < 0) + goto err; + + /* prepare cpu_set and update the end cursor */ + if (0 > convert_to_cpuset(&cpuset, + set, RTE_DIM(set))) + goto err; + end = lcores + 1 + offset; + } else { /* ',' or '\0' */ + /* haven't given cpu_set, current loop done */ + end = lcores; + + /* go back to check - */ + offset = strcspn(lcore_start, "(-"); + if (offset < (end - lcore_start) && + *(lcore_start + offset) != '(') + lflags = 1; + } + + if (*end != ',' && *end != '\0') + goto err; + + /* parse lcore_set from start point */ + if (0 > eal_parse_set(lcore_start, set, RTE_DIM(set))) + goto err; + + /* without '@', by default using lcore_set as cpu_set */ + if (*lcores != '@' && + 0 > convert_to_cpuset(&cpuset, set, RTE_DIM(set))) + goto err; + + /* start to update lcore_set */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + if (!set[idx]) + continue; + + if (cfg->lcore_role[idx] != ROLE_RTE) { + lcore_config[idx].core_index = count; + cfg->lcore_role[idx] = ROLE_RTE; + count++; + } + + if (lflags) { + CPU_ZERO(&cpuset); + CPU_SET(idx, &cpuset); + } + rte_memcpy(&lcore_config[idx].cpuset, &cpuset, + sizeof(rte_cpuset_t)); + } + + lcores = end + 1; + } while (*end != '\0'); + + if (count == 0) + goto err; + + cfg->lcore_count = count; + ret = 0; + +err: + + return ret; +} + +static int +eal_parse_syslog(const char *facility, struct internal_config *conf) +{ + int i; + static struct { + const char *name; + int value; + } map[] = { + { "auth", LOG_AUTH }, + { "cron", LOG_CRON }, + { "daemon", LOG_DAEMON }, + { "ftp", LOG_FTP }, + { "kern", LOG_KERN }, + { "lpr", LOG_LPR }, + { "mail", LOG_MAIL }, + { "news", LOG_NEWS }, + { "syslog", LOG_SYSLOG }, + { "user", LOG_USER }, + { "uucp", LOG_UUCP }, + { "local0", LOG_LOCAL0 }, + { "local1", LOG_LOCAL1 }, + { "local2", LOG_LOCAL2 }, + { "local3", LOG_LOCAL3 }, + { "local4", LOG_LOCAL4 }, + { "local5", LOG_LOCAL5 }, + { "local6", LOG_LOCAL6 }, + { "local7", LOG_LOCAL7 }, + { NULL, 0 } + }; + + for (i = 0; map[i].name; i++) { + if (!strcmp(facility, map[i].name)) { + conf->syslog_facility = map[i].value; + return 0; + } + } + return -1; +} + +static int +eal_parse_log_level(const char *level, uint32_t *log_level) +{ + char *end; + unsigned long tmp; + + errno = 0; + tmp = strtoul(level, &end, 0); + + /* check for errors */ + if ((errno != 0) || (level[0] == '\0') || + end == NULL || (*end != '\0')) + return -1; + + /* log_level is a uint32_t */ + if (tmp >= UINT32_MAX) + return -1; + + *log_level = tmp; + return 0; +} + +static enum rte_proc_type_t +eal_parse_proc_type(const char *arg) +{ + if (strncasecmp(arg, "primary", sizeof("primary")) == 0) + return RTE_PROC_PRIMARY; + if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0) + return RTE_PROC_SECONDARY; + if (strncasecmp(arg, "auto", sizeof("auto")) == 0) + return RTE_PROC_AUTO; + + return RTE_PROC_INVALID; +} + +int +eal_parse_common_option(int opt, const char *optarg, + struct internal_config *conf) +{ + switch (opt) { + /* blacklist */ + case 'b': + if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, + optarg) < 0) { + return -1; + } + break; + /* whitelist */ + case 'w': + if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI, + optarg) < 0) { + return -1; + } + break; + /* coremask */ + case 'c': + if (eal_parse_coremask(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid coremask\n"); + return -1; + } + break; + /* corelist */ + case 'l': + if (eal_parse_corelist(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid core list\n"); + return -1; + } + break; + /* size of memory */ + case 'm': + conf->memory = atoi(optarg); + conf->memory *= 1024ULL; + conf->memory *= 1024ULL; + mem_parsed = 1; + break; + /* force number of channels */ + case 'n': + conf->force_nchannel = atoi(optarg); + if (conf->force_nchannel == 0) { + RTE_LOG(ERR, EAL, "invalid channel number\n"); + return -1; + } + break; + /* force number of ranks */ + case 'r': + conf->force_nrank = atoi(optarg); + if (conf->force_nrank == 0 || + conf->force_nrank > 16) { + RTE_LOG(ERR, EAL, "invalid rank number\n"); + return -1; + } + break; + /* force loading of external driver */ + case 'd': + if (eal_plugin_add(optarg) == -1) + return -1; + break; + case 'v': + /* since message is explicitly requested by user, we + * write message at highest log level so it can always + * be seen + * even if info or warning messages are disabled */ + RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version()); + break; + + /* long options */ + case OPT_HUGE_UNLINK_NUM: + conf->hugepage_unlink = 1; + break; + + case OPT_NO_HUGE_NUM: + conf->no_hugetlbfs = 1; + break; + + case OPT_NO_PCI_NUM: + conf->no_pci = 1; + break; + + case OPT_NO_HPET_NUM: + conf->no_hpet = 1; + break; + + case OPT_VMWARE_TSC_MAP_NUM: + conf->vmware_tsc_map = 1; + break; + + case OPT_NO_SHCONF_NUM: + conf->no_shconf = 1; + break; + + case OPT_PROC_TYPE_NUM: + conf->process_type = eal_parse_proc_type(optarg); + break; + + case OPT_MASTER_LCORE_NUM: + if (eal_parse_master_lcore(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_MASTER_LCORE "\n"); + return -1; + } + break; + + case OPT_VDEV_NUM: + if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL, + optarg) < 0) { + return -1; + } + break; + + case OPT_SYSLOG_NUM: + if (eal_parse_syslog(optarg, conf) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SYSLOG "\n"); + return -1; + } + break; + + case OPT_LOG_LEVEL_NUM: { + uint32_t log; + + if (eal_parse_log_level(optarg, &log) < 0) { + RTE_LOG(ERR, EAL, + "invalid parameters for --" + OPT_LOG_LEVEL "\n"); + return -1; + } + conf->log_level = log; + break; + } + case OPT_LCORES_NUM: + if (eal_parse_lcores(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_LCORES "\n"); + return -1; + } + break; + + /* don't know what to do, leave this to caller */ + default: + return 1; + + } + + return 0; +} + +int +eal_adjust_config(struct internal_config *internal_cfg) +{ + int i; + struct rte_config *cfg = rte_eal_get_configuration(); + + if (internal_config.process_type == RTE_PROC_AUTO) + internal_config.process_type = eal_proc_type_detect(); + + /* default master lcore is the first one */ + if (!master_lcore_parsed) + cfg->master_lcore = rte_get_next_lcore(-1, 0, 0); + + /* if no memory amounts were requested, this will result in 0 and + * will be overridden later, right after eal_hugepage_info_init() */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->memory += internal_cfg->socket_mem[i]; + + return 0; +} + +int +eal_check_common_options(struct internal_config *internal_cfg) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + + if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) { + RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n"); + return -1; + } + + if (internal_cfg->process_type == RTE_PROC_INVALID) { + RTE_LOG(ERR, EAL, "Invalid process type specified\n"); + return -1; + } + if (index(internal_cfg->hugefile_prefix, '%') != NULL) { + RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" " + "option\n"); + return -1; + } + if (mem_parsed && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot " + "be specified at the same time\n"); + return -1; + } + if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot " + "be specified together with --"OPT_NO_HUGE"\n"); + return -1; + } + + if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) { + RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " + "be specified together with --"OPT_NO_HUGE"\n"); + return -1; + } + + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 && + rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) { + RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) " + "cannot be used at the same time\n"); + return -1; + } + + return 0; +} + +void +eal_common_usage(void) +{ + printf("[options]\n\n" + "EAL common options:\n" + " -c COREMASK Hexadecimal bitmask of cores to run on\n" + " -l CORELIST List of cores to run on\n" + " The argument format is [-c2][,c3[-c4],...]\n" + " where c1, c2, etc are core indexes between 0 and %d\n" + " --"OPT_LCORES" COREMAP Map lcore set to physical cpu set\n" + " The argument format is\n" + " '[<,lcores[@cpus]>...]'\n" + " lcores and cpus list are grouped by '(' and ')'\n" + " Within the group, '-' is used for range separator,\n" + " ',' is used for single number separator.\n" + " '( )' can be omitted for single element group,\n" + " '@' can be omitted if cpus and lcores have the same value\n" + " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n" + " -n CHANNELS Number of memory channels\n" + " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" + " -r RANKS Force number of memory ranks (don't detect)\n" + " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n" + " Prevent EAL from using this PCI device. The argument\n" + " format is .\n" + " -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n" + " Only use the specified PCI devices. The argument format\n" + " is <[domain:]bus:devid.func>. This option can be present\n" + " several times (once per device).\n" + " [NOTE: PCI whitelist cannot be used with -b option]\n" + " --"OPT_VDEV" Add a virtual device.\n" + " The argument format is [,key=val,...]\n" + " (ex: --vdev=eth_pcap0,iface=eth2).\n" + " -d LIB.so|DIR Add a driver or driver directory\n" + " (can be used multiple times)\n" + " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" + " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n" + " --"OPT_SYSLOG" Set syslog facility\n" + " --"OPT_LOG_LEVEL" Set default log level\n" + " -v Display version information on startup\n" + " -h, --help This help\n" + "\nEAL options for DEBUG use only:\n" + " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n" + " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n" + " --"OPT_NO_PCI" Disable PCI\n" + " --"OPT_NO_HPET" Disable HPET\n" + " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n" + "\n", RTE_MAX_LCORE); +} diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c new file mode 100644 index 00000000..40f49229 --- /dev/null +++ b/lib/librte_eal/common/eal_common_pci.c @@ -0,0 +1,457 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright 2013-2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +struct pci_driver_list pci_driver_list; +struct pci_device_list pci_device_list; + +static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) +{ + struct rte_devargs *devargs; + + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI && + devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) + continue; + if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr)) + return devargs; + } + return NULL; +} + +/* map a particular resource from a file */ +void * +pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int additional_flags) +{ + void *mapaddr; + + /* Map the PCI memory resource of device */ + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, + MAP_SHARED | additional_flags, fd, offset); + if (mapaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", + __func__, fd, requested_addr, + (unsigned long)size, (unsigned long)offset, + strerror(errno), mapaddr); + } else + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); + + return mapaddr; +} + +/* unmap a particular resource */ +void +pci_unmap_resource(void *requested_addr, size_t size) +{ + if (requested_addr == NULL) + return; + + /* Unmap the PCI memory resource of device */ + if (munmap(requested_addr, size)) { + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n", + __func__, requested_addr, (unsigned long)size, + strerror(errno)); + } else + RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", + requested_addr); +} + +/* + * If vendor/device ID match, call the devinit() function of the + * driver. + */ +static int +rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) +{ + int ret; + const struct rte_pci_id *id_table; + + for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + + struct rte_pci_addr *loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, loc->function, + dev->numa_node); + + RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->name); + + /* no initialization when blacklisted, return without error */ + if (dev->devargs != NULL && + dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { + RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n"); + return 1; + } + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { + /* map resources for devices that use igb_uio */ + ret = rte_eal_pci_map_device(dev); + if (ret != 0) + return ret; + } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && + rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* unbind current driver */ + if (pci_unbind_kernel_driver(dev) < 0) + return -1; + } + + /* reference driver structure */ + dev->driver = dr; + + /* call the driver devinit() function */ + return dr->devinit(dr, dev); + } + /* return positive value if driver doesn't support this device */ + return 1; +} + +/* + * If vendor/device ID match, call the devuninit() function of the + * driver. + */ +static int +rte_eal_pci_detach_dev(struct rte_pci_driver *dr, + struct rte_pci_device *dev) +{ + const struct rte_pci_id *id_table; + + if ((dr == NULL) || (dev == NULL)) + return -EINVAL; + + for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + + struct rte_pci_addr *loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, + loc->function, dev->numa_node); + + RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->name); + + if (dr->devuninit && (dr->devuninit(dev) < 0)) + return -1; /* negative value is an error */ + + /* clear driver structure */ + dev->driver = NULL; + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + /* unmap resources for devices that use igb_uio */ + rte_eal_pci_unmap_device(dev); + + return 0; + } + + /* return positive value if driver doesn't support this device */ + return 1; +} + +/* + * If vendor/device ID match, call the devinit() function of all + * registered driver for the given device. Return -1 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_probe_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -1; + + TAILQ_FOREACH(dr, &pci_driver_list, next) { + rc = rte_eal_pci_probe_one_driver(dr, dev); + if (rc < 0) + /* negative value is an error */ + return -1; + if (rc > 0) + /* positive value means driver doesn't support it */ + continue; + return 0; + } + return 1; +} + +/* + * If vendor/device ID match, call the devuninit() function of all + * registered driver for the given device. Return -1 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_detach_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -1; + + TAILQ_FOREACH(dr, &pci_driver_list, next) { + rc = rte_eal_pci_detach_dev(dr, dev); + if (rc < 0) + /* negative value is an error */ + return -1; + if (rc > 0) + /* positive value means driver doesn't support it */ + continue; + return 0; + } + return 1; +} + +/* + * Find the pci device specified by pci address, then invoke probe function of + * the driver of the devive. + */ +int +rte_eal_pci_probe_one(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + if (addr == NULL) + return -1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + if (rte_eal_compare_pci_addr(&dev->addr, addr)) + continue; + + ret = pci_probe_all_drivers(dev); + if (ret < 0) + goto err_return; + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + return -1; +} + +/* + * Detach device specified by its pci address. + */ +int +rte_eal_pci_detach(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + if (addr == NULL) + return -1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + if (rte_eal_compare_pci_addr(&dev->addr, addr)) + continue; + + ret = pci_detach_all_drivers(dev); + if (ret < 0) + goto err_return; + + TAILQ_REMOVE(&pci_device_list, dev, next); + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + return -1; +} + +/* + * Scan the content of the PCI bus, and call the devinit() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +int +rte_eal_pci_probe(void) +{ + struct rte_pci_device *dev = NULL; + struct rte_devargs *devargs; + int probe_all = 0; + int ret = 0; + + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0) + probe_all = 1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + + /* set devargs in PCI structure */ + devargs = pci_devargs_lookup(dev); + if (devargs != NULL) + dev->devargs = devargs; + + /* probe all or only whitelisted devices */ + if (probe_all) + ret = pci_probe_all_drivers(dev); + else if (devargs != NULL && + devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) + ret = pci_probe_all_drivers(dev); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + } + + return 0; +} + +/* dump one device */ +static int +pci_dump_one_device(FILE *f, struct rte_pci_device *dev) +{ + int i; + + fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, + dev->id.device_id); + + for (i = 0; i != sizeof(dev->mem_resource) / + sizeof(dev->mem_resource[0]); i++) { + fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", + dev->mem_resource[i].phys_addr, + dev->mem_resource[i].len); + } + return 0; +} + +/* dump devices on the bus */ +void +rte_eal_pci_dump(FILE *f) +{ + struct rte_pci_device *dev = NULL; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + pci_dump_one_device(f, dev); + } +} + +/* register a driver */ +void +rte_eal_pci_register(struct rte_pci_driver *driver) +{ + TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); +} + +/* unregister a driver */ +void +rte_eal_pci_unregister(struct rte_pci_driver *driver) +{ + TAILQ_REMOVE(&pci_driver_list, driver, next); +} diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c new file mode 100644 index 00000000..f062e81d --- /dev/null +++ b/lib/librte_eal/common/eal_common_pci_uio.c @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "eal_private.h" + +static struct rte_tailq_elem rte_uio_tailq = { + .name = "UIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_uio_tailq) + +static int +pci_uio_map_secondary(struct rte_pci_device *dev) +{ + int fd, i; + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) + continue; + + for (i = 0; i != uio_res->nb_maps; i++) { + /* + * open devname, to mmap it + */ + fd = open(uio_res->maps[i].path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + uio_res->maps[i].path, strerror(errno)); + return -1; + } + + void *mapaddr = pci_map_resource(uio_res->maps[i].addr, + fd, (off_t)uio_res->maps[i].offset, + (size_t)uio_res->maps[i].size, 0); + /* fd is not needed in slave process, close it */ + close(fd); + if (mapaddr != uio_res->maps[i].addr) { + RTE_LOG(ERR, EAL, + "Cannot mmap device resource file %s to address: %p\n", + uio_res->maps[i].path, + uio_res->maps[i].addr); + return -1; + } + } + return 0; + } + + RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); + return 1; +} + +/* map the PCI resource of a PCI device in virtual memory */ +int +pci_uio_map_resource(struct rte_pci_device *dev) +{ + int i, map_idx = 0, ret; + uint64_t phaddr; + struct mapped_pci_resource *uio_res = NULL; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + dev->intr_handle.fd = -1; + dev->intr_handle.uio_cfg_fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* secondary processes - use already recorded details */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_map_secondary(dev); + + /* allocate uio resource */ + ret = pci_uio_alloc_resource(dev, &uio_res); + if (ret) + return ret; + + /* Map all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + phaddr = dev->mem_resource[i].phys_addr; + if (phaddr == 0) + continue; + + ret = pci_uio_map_resource_by_index(dev, i, + uio_res, map_idx); + if (ret) + goto error; + + map_idx++; + } + + uio_res->nb_maps = map_idx; + + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); + + return 0; +error: + for (i = 0; i < map_idx; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } + pci_uio_free_resource(dev, uio_res); + return -1; +} + +static void +pci_uio_unmap(struct mapped_pci_resource *uio_res) +{ + int i; + + if (uio_res == NULL) + return; + + for (i = 0; i != uio_res->nb_maps; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } +} + +static struct mapped_pci_resource * +pci_uio_find_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return NULL; + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) + return uio_res; + } + return NULL; +} + +/* unmap the PCI resource of a PCI device in virtual memory */ +void +pci_uio_unmap_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return; + + /* find an entry for the device */ + uio_res = pci_uio_find_resource(dev); + if (uio_res == NULL) + return; + + /* secondary processes - just free maps */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_unmap(uio_res); + + TAILQ_REMOVE(uio_res_list, uio_res, next); + + /* unmap all resources */ + pci_uio_unmap(uio_res); + + /* free uio resource */ + rte_free(uio_res); + + /* close fd if in primary process */ + close(dev->intr_handle.fd); + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; +} diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c new file mode 100644 index 00000000..12e0fcac --- /dev/null +++ b/lib/librte_eal/common/eal_common_proc.c @@ -0,0 +1,61 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_internal_cfg.h" + +int +rte_eal_primary_proc_alive(const char *config_file_path) +{ + int config_fd; + + if (config_file_path) + config_fd = open(config_file_path, O_RDONLY); + else { + char default_path[PATH_MAX+1]; + snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT, + default_config_dir, "rte"); + config_fd = open(default_path, O_RDONLY); + } + if (config_fd < 0) + return 0; + + int ret = lockf(config_fd, F_TEST, 0); + close(config_fd); + + return !!ret; +} diff --git a/lib/librte_eal/common/eal_common_string_fns.c b/lib/librte_eal/common/eal_common_string_fns.c new file mode 100644 index 00000000..125a3e2d --- /dev/null +++ b/lib/librte_eal/common/eal_common_string_fns.c @@ -0,0 +1,69 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include + +/* split string into tokens */ +int +rte_strsplit(char *string, int stringlen, + char **tokens, int maxtokens, char delim) +{ + int i, tok = 0; + int tokstart = 1; /* first token is right at start of string */ + + if (string == NULL || tokens == NULL) + goto einval_error; + + for (i = 0; i < stringlen; i++) { + if (string[i] == '\0' || tok >= maxtokens) + break; + if (tokstart) { + tokstart = 0; + tokens[tok++] = &string[i]; + } + if (string[i] == delim) { + string[i] = '\0'; + tokstart = 1; + } + } + return tok; + +einval_error: + errno = EINVAL; + return -1; +} diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c new file mode 100644 index 00000000..bb08ec8b --- /dev/null +++ b/lib/librte_eal/common/eal_common_tailqs.c @@ -0,0 +1,202 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem); +/* local tailq list */ +static struct rte_tailq_elem_head rte_tailq_elem_head = + TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head); + +/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */ +static int rte_tailqs_count = -1; + +struct rte_tailq_head * +rte_eal_tailq_lookup(const char *name) +{ + unsigned i; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + if (name == NULL) + return NULL; + + for (i = 0; i < RTE_MAX_TAILQ; i++) { + if (!strncmp(name, mcfg->tailq_head[i].name, + RTE_TAILQ_NAMESIZE-1)) + return &mcfg->tailq_head[i]; + } + + return NULL; +} + +void +rte_dump_tailq(FILE *f) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->qlock); + for (i = 0; i < RTE_MAX_TAILQ; i++) { + const struct rte_tailq_head *tailq = &mcfg->tailq_head[i]; + const struct rte_tailq_entry_head *head = &tailq->tailq_head; + + fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n", + i, tailq->name, head->tqh_first, head->tqh_last); + } + rte_rwlock_read_unlock(&mcfg->qlock); +} + +static struct rte_tailq_head * +rte_eal_tailq_create(const char *name) +{ + struct rte_tailq_head *head = NULL; + + if (!rte_eal_tailq_lookup(name) && + (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) { + struct rte_mem_config *mcfg; + + mcfg = rte_eal_get_configuration()->mem_config; + head = &mcfg->tailq_head[rte_tailqs_count]; + snprintf(head->name, sizeof(head->name) - 1, "%s", name); + TAILQ_INIT(&head->tailq_head); + rte_tailqs_count++; + } + + return head; +} + +/* local register, used to store "early" tailqs before rte_eal_init() and to + * ensure secondary process only registers tailqs once. */ +static int +rte_eal_tailq_local_register(struct rte_tailq_elem *t) +{ + struct rte_tailq_elem *temp; + + TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) { + if (!strncmp(t->name, temp->name, sizeof(temp->name))) + return -1; + } + + TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next); + return 0; +} + +static void +rte_eal_tailq_update(struct rte_tailq_elem *t) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* primary process is the only one that creates */ + t->head = rte_eal_tailq_create(t->name); + } else { + t->head = rte_eal_tailq_lookup(t->name); + } +} + +int +rte_eal_tailq_register(struct rte_tailq_elem *t) +{ + if (rte_eal_tailq_local_register(t) < 0) { + RTE_LOG(ERR, EAL, + "%s tailq is already registered\n", t->name); + goto error; + } + + /* if a register happens after rte_eal_tailqs_init(), then we can update + * tailq head */ + if (rte_tailqs_count >= 0) { + rte_eal_tailq_update(t); + if (t->head == NULL) { + RTE_LOG(ERR, EAL, + "Cannot initialize tailq: %s\n", t->name); + TAILQ_REMOVE(&rte_tailq_elem_head, t, next); + goto error; + } + } + + return 0; + +error: + t->head = NULL; + return -1; +} + +int +rte_eal_tailqs_init(void) +{ + struct rte_tailq_elem *t; + + rte_tailqs_count = 0; + + TAILQ_FOREACH(t, &rte_tailq_elem_head, next) { + /* second part of register job for "early" tailqs, see + * rte_eal_tailq_register and EAL_REGISTER_TAILQ */ + rte_eal_tailq_update(t); + if (t->head == NULL) { + RTE_LOG(ERR, EAL, + "Cannot initialize tailq: %s\n", t->name); + /* no need to TAILQ_REMOVE, we are going to panic in + * rte_eal_init() */ + goto fail; + } + } + + return 0; + +fail: + rte_dump_tailq(stderr); + return -1; +} diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c new file mode 100644 index 00000000..2405e93f --- /dev/null +++ b/lib/librte_eal/common/eal_common_thread.c @@ -0,0 +1,157 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "eal_thread.h" + +RTE_DECLARE_PER_LCORE(unsigned , _socket_id); + +unsigned rte_socket_id(void) +{ + return RTE_PER_LCORE(_socket_id); +} + +int eal_cpuset_socket_id(rte_cpuset_t *cpusetp) +{ + unsigned cpu = 0; + int socket_id = SOCKET_ID_ANY; + int sid; + + if (cpusetp == NULL) + return SOCKET_ID_ANY; + + do { + if (!CPU_ISSET(cpu, cpusetp)) + continue; + + if (socket_id == SOCKET_ID_ANY) + socket_id = eal_cpu_socket_id(cpu); + + sid = eal_cpu_socket_id(cpu); + if (socket_id != sid) { + socket_id = SOCKET_ID_ANY; + break; + } + + } while (++cpu < RTE_MAX_LCORE); + + return socket_id; +} + +int +rte_thread_set_affinity(rte_cpuset_t *cpusetp) +{ + int s; + unsigned lcore_id; + pthread_t tid; + + tid = pthread_self(); + + s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp); + if (s != 0) { + RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n"); + return -1; + } + + /* store socket_id in TLS for quick access */ + RTE_PER_LCORE(_socket_id) = + eal_cpuset_socket_id(cpusetp); + + /* store cpuset in TLS for quick access */ + memmove(&RTE_PER_LCORE(_cpuset), cpusetp, + sizeof(rte_cpuset_t)); + + lcore_id = rte_lcore_id(); + if (lcore_id != (unsigned)LCORE_ID_ANY) { + /* EAL thread will update lcore_config */ + lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id); + memmove(&lcore_config[lcore_id].cpuset, cpusetp, + sizeof(rte_cpuset_t)); + } + + return 0; +} + +void +rte_thread_get_affinity(rte_cpuset_t *cpusetp) +{ + assert(cpusetp); + memmove(cpusetp, &RTE_PER_LCORE(_cpuset), + sizeof(rte_cpuset_t)); +} + +int +eal_thread_dump_affinity(char *str, unsigned size) +{ + rte_cpuset_t cpuset; + unsigned cpu; + int ret; + unsigned int out = 0; + + rte_thread_get_affinity(&cpuset); + + for (cpu = 0; cpu < RTE_MAX_LCORE; cpu++) { + if (!CPU_ISSET(cpu, &cpuset)) + continue; + + ret = snprintf(str + out, + size - out, "%u,", cpu); + if (ret < 0 || (unsigned)ret >= size - out) { + /* string will be truncated */ + ret = -1; + goto exit; + } + + out += ret; + } + + ret = 0; +exit: + /* remove the last separator */ + if (out > 0) + str[out - 1] = '\0'; + + return ret; +} diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c new file mode 100644 index 00000000..c4227cd8 --- /dev/null +++ b/lib/librte_eal/common/eal_common_timer.c @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "eal_private.h" + +/* The frequency of the RDTSC timer resolution */ +static uint64_t eal_tsc_resolution_hz; + +void +rte_delay_us(unsigned us) +{ + const uint64_t start = rte_get_timer_cycles(); + const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; + while ((rte_get_timer_cycles() - start) < ticks) + rte_pause(); +} + +uint64_t +rte_get_tsc_hz(void) +{ + return eal_tsc_resolution_hz; +} + +static uint64_t +estimate_tsc_freq(void) +{ + RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly" + " - clock timings may be less accurate.\n"); + /* assume that the sleep(1) will sleep for 1 second */ + uint64_t start = rte_rdtsc(); + sleep(1); + return rte_rdtsc() - start; +} + +void +set_tsc_freq(void) +{ + uint64_t freq = get_tsc_freq(); + + if (!freq) + freq = estimate_tsc_freq(); + + RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); + eal_tsc_resolution_hz = freq; +} diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h new file mode 100644 index 00000000..fdb4a70b --- /dev/null +++ b/lib/librte_eal/common/eal_filesystem.h @@ -0,0 +1,118 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Stores functions and path defines for files and directories + * on the filesystem for Linux, that are used by the Linux EAL. + */ + +#ifndef EAL_FILESYSTEM_H +#define EAL_FILESYSTEM_H + +/** Path of rte config file. */ +#define RUNTIME_CONFIG_FMT "%s/.%s_config" + +#include +#include +#include +#include + +#include +#include "eal_internal_cfg.h" + +static const char *default_config_dir = "/var/run"; + +static inline const char * +eal_runtime_config_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory, + internal_config.hugefile_prefix); + return buffer; +} + +/** Path of hugepage info file. */ +#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info" + +static inline const char * +eal_hugepage_info_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory, + internal_config.hugefile_prefix); + return buffer; +} + +/** String format for hugepage map files. */ +#define HUGEFILE_FMT "%s/%smap_%d" +#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d" + +static inline const char * +eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) +{ + snprintf(buffer, buflen, HUGEFILE_FMT, hugedir, + internal_config.hugefile_prefix, f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS +static inline const char * +eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id) +{ + snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir, + internal_config.hugefile_prefix, f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} +#endif + +/** define the default filename prefix for the %s values above */ +#define HUGEFILE_PREFIX_DEFAULT "rte" + +/** Function to read a single numeric value from a file on the filesystem. + * Used to read information from files on /sys */ +int eal_parse_sysfs_value(const char *filename, unsigned long *val); + +#endif /* EAL_FILESYSTEM_H */ diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h new file mode 100644 index 00000000..38edac03 --- /dev/null +++ b/lib/librte_eal/common/eal_hugepages.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_HUGEPAGES_H +#define EAL_HUGEPAGES_H + +#include +#include +#include + +#define MAX_HUGEPAGE_PATH PATH_MAX + +/** + * Structure used to store informations about hugepages that we mapped + * through the files in hugetlbfs. + */ +struct hugepage_file { + void *orig_va; /**< virtual addr of first mmap() */ + void *final_va; /**< virtual addr of 2nd mmap() */ + uint64_t physaddr; /**< physical addr */ + size_t size; /**< the page size */ + int socket_id; /**< NUMA socket ID */ + int file_id; /**< the '%d' in HUGEFILE_FMT */ + int memseg_id; /**< the memory segment to which page belongs */ +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int repeated; /**< number of times the page size is repeated */ +#endif + char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ +}; + +/** + * Read the information from linux on what hugepages are available + * for the EAL to use + */ +int eal_hugepage_info_init(void); + +#endif /* EAL_HUGEPAGES_H */ diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h new file mode 100644 index 00000000..5f1367eb --- /dev/null +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Holds the structures for the eal internal configuration + */ + +#ifndef EAL_INTERNAL_CFG_H +#define EAL_INTERNAL_CFG_H + +#include +#include + +#define MAX_HUGEPAGE_SIZES 3 /**< support up to 3 page sizes */ + +/* + * internal configuration structure for the number, size and + * mount points of hugepages + */ +struct hugepage_info { + uint64_t hugepage_sz; /**< size of a huge page */ + const char *hugedir; /**< dir where hugetlbfs is mounted */ + uint32_t num_pages[RTE_MAX_NUMA_NODES]; + /**< number of hugepages of that size on each socket */ + int lock_descriptor; /**< file descriptor for hugepage dir */ +}; + +/** + * internal configuration + */ +struct internal_config { + volatile size_t memory; /**< amount of asked memory */ + volatile unsigned force_nchannel; /**< force number of channels */ + volatile unsigned force_nrank; /**< force number of ranks */ + volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ + unsigned hugepage_unlink; /**< true to unlink backing files */ + volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ + volatile unsigned no_pci; /**< true to disable PCI */ + volatile unsigned no_hpet; /**< true to disable HPET */ + volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping + * instead of native TSC */ + volatile unsigned no_shconf; /**< true if there is no shared config */ + volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */ + volatile enum rte_proc_type_t process_type; /**< multi-process proc type */ + /** true to try allocating memory on specific sockets */ + volatile unsigned force_sockets; + volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ + uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ + volatile int syslog_facility; /**< facility passed to openlog() */ + volatile uint32_t log_level; /**< default log level */ + /** default interrupt mode for VFIO */ + volatile enum rte_intr_mode vfio_intr_mode; + const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ + const char *hugepage_dir; /**< specific hugetlbfs directory to use */ + + unsigned num_hugepage_sizes; /**< how many sizes on this system */ + struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES]; +}; +extern struct internal_config internal_config; /**< Global EAL configuration. */ + +void eal_reset_internal_config(struct internal_config *internal_cfg); + +#endif /* EAL_INTERNAL_CFG_H */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h new file mode 100644 index 00000000..a881c62e --- /dev/null +++ b/lib/librte_eal/common/eal_options.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_OPTIONS_H +#define EAL_OPTIONS_H + +enum { + /* long options mapped to a short option */ +#define OPT_HELP "help" + OPT_HELP_NUM = 'h', +#define OPT_PCI_BLACKLIST "pci-blacklist" + OPT_PCI_BLACKLIST_NUM = 'b', +#define OPT_PCI_WHITELIST "pci-whitelist" + OPT_PCI_WHITELIST_NUM = 'w', + + /* first long only option value must be >= 256, so that we won't + * conflict with short options */ + OPT_LONG_MIN_NUM = 256, +#define OPT_BASE_VIRTADDR "base-virtaddr" + OPT_BASE_VIRTADDR_NUM, +#define OPT_CREATE_UIO_DEV "create-uio-dev" + OPT_CREATE_UIO_DEV_NUM, +#define OPT_FILE_PREFIX "file-prefix" + OPT_FILE_PREFIX_NUM, +#define OPT_HUGE_DIR "huge-dir" + OPT_HUGE_DIR_NUM, +#define OPT_HUGE_UNLINK "huge-unlink" + OPT_HUGE_UNLINK_NUM, +#define OPT_LCORES "lcores" + OPT_LCORES_NUM, +#define OPT_LOG_LEVEL "log-level" + OPT_LOG_LEVEL_NUM, +#define OPT_MASTER_LCORE "master-lcore" + OPT_MASTER_LCORE_NUM, +#define OPT_PROC_TYPE "proc-type" + OPT_PROC_TYPE_NUM, +#define OPT_NO_HPET "no-hpet" + OPT_NO_HPET_NUM, +#define OPT_NO_HUGE "no-huge" + OPT_NO_HUGE_NUM, +#define OPT_NO_PCI "no-pci" + OPT_NO_PCI_NUM, +#define OPT_NO_SHCONF "no-shconf" + OPT_NO_SHCONF_NUM, +#define OPT_SOCKET_MEM "socket-mem" + OPT_SOCKET_MEM_NUM, +#define OPT_SYSLOG "syslog" + OPT_SYSLOG_NUM, +#define OPT_VDEV "vdev" + OPT_VDEV_NUM, +#define OPT_VFIO_INTR "vfio-intr" + OPT_VFIO_INTR_NUM, +#define OPT_VMWARE_TSC_MAP "vmware-tsc-map" + OPT_VMWARE_TSC_MAP_NUM, +#define OPT_XEN_DOM0 "xen-dom0" + OPT_XEN_DOM0_NUM, + OPT_LONG_MAX_NUM +}; + +extern const char eal_short_options[]; +extern const struct option eal_long_options[]; + +int eal_parse_common_option(int opt, const char *argv, + struct internal_config *conf); +int eal_adjust_config(struct internal_config *internal_cfg); +int eal_check_common_options(struct internal_config *internal_cfg); +void eal_common_usage(void); +enum rte_proc_type_t eal_proc_type_detect(void); +int eal_plugins_init(void); + +#endif /* EAL_OPTIONS_H */ diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h new file mode 100644 index 00000000..2342fa16 --- /dev/null +++ b/lib/librte_eal/common/eal_private.h @@ -0,0 +1,331 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EAL_PRIVATE_H_ +#define _EAL_PRIVATE_H_ + +#include +#include + +/** + * Initialize the memzone subsystem (private to eal). + * + * @return + * - 0 on success + * - Negative on error + */ +int rte_eal_memzone_init(void); + +/** + * Common log initialization function (private to eal). + * + * Called by environment-specific log initialization function to initialize + * log history. + * + * @param default_log + * The default log stream to be used. + * @return + * - 0 on success + * - Negative on error + */ +int rte_eal_common_log_init(FILE *default_log); + +/** + * Fill configuration with number of physical and logical processors + * + * This function is private to EAL. + * + * Parse /proc/cpuinfo to get the number of physical and logical + * processors on the machine. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_cpu_init(void); + +/** + * Map memory + * + * This function is private to EAL. + * + * Fill configuration structure with these infos, and return 0 on success. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_memory_init(void); + +/** + * Configure timers + * + * This function is private to EAL. + * + * Mmap memory areas used by HPET (high precision event timer) that will + * provide our time reference, and configure the TSC frequency also for it + * to be used as a reference. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_timer_init(void); + +/** + * Init early logs + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_log_early_init(void); + +/** + * Init the default log stream + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_log_init(const char *id, int facility); + +/** + * Init the default log stream + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_pci_init(void); + +#ifdef RTE_LIBRTE_IVSHMEM +/** + * Init the memory from IVSHMEM devices + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_ivshmem_init(void); + +/** + * Init objects in IVSHMEM devices + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_ivshmem_obj_init(void); +#endif + +struct rte_pci_driver; +struct rte_pci_device; + +/** + * Unbind kernel driver for this device + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_unbind_kernel_driver(struct rte_pci_device *dev); + +/** + * Map the PCI resource of a PCI device in virtual memory + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource(struct rte_pci_device *dev); + +/** + * Unmap the PCI resource of a PCI device + * + * This function is private to EAL. + */ +void pci_uio_unmap_resource(struct rte_pci_device *dev); + +/** + * Allocate uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to allocate uio resource + * @param uio_res + * Pointer to uio resource. + * If the function returns 0, the pointer will be filled. + * @return + * 0 on success, negative on error + */ +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); + +/** + * Free uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to free uio resource + * @param uio_res + * Pointer to uio resource. + */ +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); + +/** + * Map device memory to uio resource + * + * This function is private to EAL. + * + * @param dev + * PCI device that has memory information. + * @param res_idx + * Memory resource index of the PCI device. + * @param uio_res + * uio resource that will keep mapping information. + * @param map_idx + * Mapping information index of the uio resource. + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +/** + * Init tail queues for non-EAL library structures. This is to allow + * the rings, mempools, etc. lists to be shared among multiple processes + * + * This function is private to EAL + * + * @return + * 0 on success, negative on error + */ +int rte_eal_tailqs_init(void); + +/** + * Init interrupt handling. + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_intr_init(void); + +/** + * Init alarm mechanism. This is to allow a callback be called after + * specific time. + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_alarm_init(void); + +/** + * This function initialises any virtual devices + * + * This function is private to the EAL. + */ +int rte_eal_dev_init(void); + +/** + * Function is to check if the kernel module(like, vfio, vfio_iommu_type1, + * etc.) loaded. + * + * @param module_name + * The module's name which need to be checked + * + * @return + * -1 means some error happens(NULL pointer or open failure) + * 0 means the module not loaded + * 1 means the module loaded + */ +int rte_eal_check_module(const char *module_name); + +/** + * Get cpu core_id. + * + * This function is private to the EAL. + */ +unsigned eal_cpu_core_id(unsigned lcore_id); + +/** + * Check if cpu is present. + * + * This function is private to the EAL. + */ +int eal_cpu_detected(unsigned lcore_id); + +/** + * Set TSC frequency from precise value or estimation + * + * This function is private to the EAL. + */ +void set_tsc_freq(void); + +/** + * Get precise TSC frequency from system + * + * This function is private to the EAL. + */ +uint64_t get_tsc_freq(void); + +/** + * Prepare physical memory mapping + * i.e. hugepages on Linux and + * contigmem on BSD. + * + * This function is private to the EAL. + */ +int rte_eal_hugepage_init(void); + +/** + * Creates memory mapping in secondary process + * i.e. hugepages on Linux and + * contigmem on BSD. + * + * This function is private to the EAL. + */ +int rte_eal_hugepage_attach(void); + +#endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h new file mode 100644 index 00000000..e4e76b9d --- /dev/null +++ b/lib/librte_eal/common/eal_thread.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_THREAD_H +#define EAL_THREAD_H + +#include + +/** + * basic loop of thread, called for each thread by eal_init(). + * + * @param arg + * opaque pointer + */ +__attribute__((noreturn)) void *eal_thread_loop(void *arg); + +/** + * Init per-lcore info for master thread + * + * @param lcore_id + * identifier of master lcore + */ +void eal_thread_init_master(unsigned lcore_id); + +/** + * Get the NUMA socket id from cpu id. + * This function is private to EAL. + * + * @param cpu_id + * The logical process id. + * @return + * socket_id or SOCKET_ID_ANY + */ +unsigned eal_cpu_socket_id(unsigned cpu_id); + +/** + * Get the NUMA socket id from cpuset. + * This function is private to EAL. + * + * @param cpusetp + * The point to a valid cpu set. + * @return + * socket_id or SOCKET_ID_ANY + */ +int eal_cpuset_socket_id(rte_cpuset_t *cpusetp); + +/** + * Default buffer size to use with eal_thread_dump_affinity() + */ +#define RTE_CPU_AFFINITY_STR_LEN 256 + +/** + * Dump the current pthread cpuset. + * This function is private to EAL. + * + * Note: + * If the dump size is greater than the size of given buffer, + * the string will be truncated and with '\0' at the end. + * + * @param str + * The string buffer the cpuset will dump to. + * @param size + * The string buffer size. + * @return + * 0 for success, -1 if truncation happens. + */ +int +eal_thread_dump_affinity(char *str, unsigned size); + +#endif /* EAL_THREAD_H */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h new file mode 100644 index 00000000..454a12b0 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h @@ -0,0 +1,48 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_ARM_H_ +#define _RTE_ATOMIC_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_wmb() + +#define rte_smp_rmb() rte_rmb() + +#endif /* _RTE_ATOMIC_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h new file mode 100644 index 00000000..9ae1e78b --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_ARM32_H_ +#define _RTE_ATOMIC_ARM32_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + */ +#define rte_mb() __sync_synchronize() + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + */ +#define rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0) + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + */ +#define rte_rmb() __sync_synchronize() + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h new file mode 100644 index 00000000..671caa76 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h @@ -0,0 +1,88 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_ATOMIC_ARM64_H_ +#define _RTE_ATOMIC_ARM64_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +#define dmb(opt) do { asm volatile("dmb " #opt : : : "memory"); } while (0) + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_mb(void) +{ + dmb(ish); +} + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_wmb(void) +{ + dmb(ishst); +} + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + * This function is architecture dependent. + */ +static inline void rte_rmb(void) +{ + dmb(ishld); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h new file mode 100644 index 00000000..3f2dd1f2 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h @@ -0,0 +1,107 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_ARM_H_ +#define _RTE_BYTEORDER_ARM_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +/* fix missing __builtin_bswap16 for gcc older then 4.8 */ +#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + +static inline uint16_t rte_arch_bswap16(uint16_t _x) +{ + register uint16_t x = _x; + + asm volatile ("rev16 %0,%1" + : "=r" (x) + : "r" (x) + ); + return x; +} + +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) +#endif + +/* ARM architecture is bi-endian (both big and little). */ +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#else /* RTE_BIG_ENDIAN */ + +#define rte_cpu_to_le_16(x) rte_bswap16(x) +#define rte_cpu_to_le_32(x) rte_bswap32(x) +#define rte_cpu_to_le_64(x) rte_bswap64(x) + +#define rte_cpu_to_be_16(x) (x) +#define rte_cpu_to_be_32(x) (x) +#define rte_cpu_to_be_64(x) (x) + +#define rte_le_to_cpu_16(x) rte_bswap16(x) +#define rte_le_to_cpu_32(x) rte_bswap32(x) +#define rte_le_to_cpu_64(x) rte_bswap64(x) + +#define rte_be_to_cpu_16(x) (x) +#define rte_be_to_cpu_32(x) (x) +#define rte_be_to_cpu_64(x) (x) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h new file mode 100644 index 00000000..b8f62889 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_ARM_H_ +#define _RTE_CPUFLAGS_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_CPUFLAGS_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h new file mode 100644 index 00000000..eb02d9b9 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h @@ -0,0 +1,82 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_ARM32_H_ +#define _RTE_CPUFLAGS_ARM32_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_SWP = 0, + RTE_CPUFLAG_HALF, + RTE_CPUFLAG_THUMB, + RTE_CPUFLAG_A26BIT, + RTE_CPUFLAG_FAST_MULT, + RTE_CPUFLAG_FPA, + RTE_CPUFLAG_VFP, + RTE_CPUFLAG_EDSP, + RTE_CPUFLAG_JAVA, + RTE_CPUFLAG_IWMMXT, + RTE_CPUFLAG_CRUNCH, + RTE_CPUFLAG_THUMBEE, + RTE_CPUFLAG_NEON, + RTE_CPUFLAG_VFPv3, + RTE_CPUFLAG_VFPv3D16, + RTE_CPUFLAG_TLS, + RTE_CPUFLAG_VFPv4, + RTE_CPUFLAG_IDIVA, + RTE_CPUFLAG_IDIVT, + RTE_CPUFLAG_VFPD32, + RTE_CPUFLAG_LPAE, + RTE_CPUFLAG_EVTSTRM, + RTE_CPUFLAG_AES, + RTE_CPUFLAG_PMULL, + RTE_CPUFLAG_SHA1, + RTE_CPUFLAG_SHA2, + RTE_CPUFLAG_CRC32, + RTE_CPUFLAG_V7L, + /* The last item */ + RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h new file mode 100644 index 00000000..49aead92 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h @@ -0,0 +1,64 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_ARM64_H_ +#define _RTE_CPUFLAGS_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_FP = 0, + RTE_CPUFLAG_NEON, + RTE_CPUFLAG_EVTSTRM, + RTE_CPUFLAG_AES, + RTE_CPUFLAG_PMULL, + RTE_CPUFLAG_SHA1, + RTE_CPUFLAG_SHA2, + RTE_CPUFLAG_CRC32, + RTE_CPUFLAG_ATOMICS, + RTE_CPUFLAG_AARCH64, + /* The last item */ + RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles.h b/lib/librte_eal/common/include/arch/arm/rte_cycles.h new file mode 100644 index 00000000..a8009a06 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_ARM_H_ +#define _RTE_CYCLES_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_CYCLES_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h new file mode 100644 index 00000000..9c1be71e --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h @@ -0,0 +1,121 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_ARM32_H_ +#define _RTE_CYCLES_ARM32_H_ + +/* ARM v7 does not have suitable source of clock signals. The only clock counter + available in the core is 32 bit wide. Therefore it is unsuitable as the + counter overlaps every few seconds and probably is not accessible by + userspace programs. Therefore we use clock_gettime(CLOCK_MONOTONIC_RAW) to + simulate counter running at 1GHz. +*/ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +#ifndef RTE_ARM_EAL_RDTSC_USE_PMU + +/** + * This call is easily portable to any ARM architecture, however, + * it may be damn slow and inprecise for some tasks. + */ +static inline uint64_t +__rte_rdtsc_syscall(void) +{ + struct timespec val; + uint64_t v; + + while (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0) + /* no body */; + + v = (uint64_t) val.tv_sec * 1000000000LL; + v += (uint64_t) val.tv_nsec; + return v; +} +#define rte_rdtsc __rte_rdtsc_syscall + +#else + +/** + * This function requires to configure the PMCCNTR and enable + * userspace access to it: + * + * asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r"(1)); + * asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(29)); + * asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x8000000f)); + * + * which is possible only from the priviledged mode (kernel space). + */ +static inline uint64_t +__rte_rdtsc_pmccntr(void) +{ + unsigned tsc; + uint64_t final_tsc; + + /* Read PMCCNTR */ + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(tsc)); + /* 1 tick = 64 clocks */ + final_tsc = ((uint64_t)tsc) << 6; + + return (uint64_t)final_tsc; +} +#define rte_rdtsc __rte_rdtsc_pmccntr + +#endif /* RTE_ARM_EAL_RDTSC_USE_PMU */ + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h new file mode 100644 index 00000000..14f26120 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h @@ -0,0 +1,71 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CYCLES_ARM64_H_ +#define _RTE_CYCLES_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +static inline uint64_t +rte_rdtsc(void) +{ + uint64_t tsc; + + asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); + return tsc; +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h new file mode 100644 index 00000000..1d562c3f --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_ARM_H_ +#define _RTE_MEMCPY_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_MEMCPY_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h new file mode 100644 index 00000000..988125b3 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h @@ -0,0 +1,338 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_ARM32_H_ +#define _RTE_MEMCPY_ARM32_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_memcpy.h" + +#ifdef RTE_ARCH_ARM_NEON_MEMCPY + +#ifndef RTE_MACHINE_CPUFLAG_NEON +#error "Cannot optimize memcpy by NEON as the CPU seems to not support this" +#endif + +/* ARM NEON Intrinsics are used to copy data */ +#include + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + vst1q_u8(dst, vld1q_u8(src)); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + asm volatile ( + "vld1.8 {d0-d3}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : : "memory", "d0", "d1", "d2", "d3"); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d5}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d5}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5"); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d7}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d7}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + asm volatile ("pld [%0, #64]" : : "r" (src)); + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d7}, [%0]!\n\t" + "vld1.8 {d8-d11}, [%0]!\n\t" + "vld1.8 {d12-d15}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d7}, [%1]!\n\t" + "vst1.8 {d8-d11}, [%1]!\n\t" + "vst1.8 {d12-d15}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15"); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + asm volatile ("pld [%0, #64]" : : "r" (src)); + asm volatile ("pld [%0, #128]" : : "r" (src)); + asm volatile ("pld [%0, #192]" : : "r" (src)); + asm volatile ("pld [%0, #256]" : : "r" (src)); + asm volatile ("pld [%0, #320]" : : "r" (src)); + asm volatile ("pld [%0, #384]" : : "r" (src)); + asm volatile ("pld [%0, #448]" : : "r" (src)); + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d7}, [%0]!\n\t" + "vld1.8 {d8-d11}, [%0]!\n\t" + "vld1.8 {d12-d15}, [%0]!\n\t" + "vld1.8 {d16-d19}, [%0]!\n\t" + "vld1.8 {d20-d23}, [%0]!\n\t" + "vld1.8 {d24-d27}, [%0]!\n\t" + "vld1.8 {d28-d31}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d7}, [%1]!\n\t" + "vst1.8 {d8-d11}, [%1]!\n\t" + "vst1.8 {d12-d15}, [%1]!\n\t" + "vst1.8 {d16-d19}, [%1]!\n\t" + "vst1.8 {d20-d23}, [%1]!\n\t" + "vst1.8 {d24-d27}, [%1]!\n\t" + "vst1.8 {d28-d31}, [%1]!\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); +} + +#define rte_memcpy(dst, src, n) \ + ({ (__builtin_constant_p(n)) ? \ + memcpy((dst), (src), (n)) : \ + rte_memcpy_func((dst), (src), (n)); }) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* We can't copy < 16 bytes using XMM registers so do it manually. */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + dst = (uint8_t *)dst + 1; + src = (const uint8_t *)src + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + dst = (uint16_t *)dst + 1; + src = (const uint16_t *)src + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + dst = (uint32_t *)dst + 1; + src = (const uint32_t *)src + 1; + } + if (n & 0x08) { + /* ARMv7 can not handle unaligned access to long long + * (uint64_t). Therefore two uint32_t operations are + * used. + */ + *(uint32_t *)dst = *(const uint32_t *)src; + dst = (uint32_t *)dst + 1; + src = (const uint32_t *)src + 1; + *(uint32_t *)dst = *(const uint32_t *)src; + } + return ret; + } + + /* Special fast cases for <= 128 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + return ret; + } + + if (n <= 128) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + + /* + * For large copies > 128 bytes. This combination of 256, 64 and 16 byte + * copies was found to be faster than doing 128 and 32 byte copies as + * well. + */ + for ( ; n >= 256; n -= 256) { + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 256; + src = (const uint8_t *)src + 256; + } + + /* + * We split the remaining bytes (which will be less than 256) into + * 64byte (2^6) chunks. + * Using incrementing integers in the case labels of a switch statement + * enourages the compiler to use a jump table. To get incrementing + * integers, we shift the 2 relevant bits to the LSB position to first + * get decrementing integers, and then subtract. + */ + switch (3 - (n >> 6)) { + case 0x00: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x01: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x02: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + default: + break; + } + + /* + * We split the remaining bytes (which will be less than 64) into + * 16byte (2^4) chunks, using the same switch structure as above. + */ + switch (3 - (n >> 4)) { + case 0x00: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x01: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x02: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + default: + break; + } + + /* Copy any remaining bytes, without going beyond end of buffers */ + if (n != 0) + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; +} + +#else + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 16); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 32); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 48); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 64); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 128); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 256); +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +#endif /* RTE_ARCH_ARM_NEON_MEMCPY */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h new file mode 100644 index 00000000..917cdc1b --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h @@ -0,0 +1,93 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCPY_ARM64_H_ +#define _RTE_MEMCPY_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "generic/rte_memcpy.h" + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 16); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 32); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 48); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 64); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 128); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 256); +} + +#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_ARM_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h new file mode 100644 index 00000000..aa37de57 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_ARM_H_ +#define _RTE_PREFETCH_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_PREFETCH_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h new file mode 100644 index 00000000..5aeed22d --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h @@ -0,0 +1,67 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_ARM32_H_ +#define _RTE_PREFETCH_ARM32_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("pld [%0]" : : "r" (p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("pld [%0]" : : "r" (p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("pld [%0]" : : "r" (p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + /* non-temporal version not available, fallback to rte_prefetch0 */ + rte_prefetch0(p); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h new file mode 100644 index 00000000..3ed46a46 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h @@ -0,0 +1,66 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_ARM_64_H_ +#define _RTE_PREFETCH_ARM_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("PRFM PLDL2KEEP, [%0]" : : "r" (p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p)); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_ARM_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h new file mode 100644 index 00000000..664bec88 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h @@ -0,0 +1,40 @@ +/* copied from ppc_64 */ + +#ifndef _RTE_RWLOCK_ARM_H_ +#define _RTE_RWLOCK_ARM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_unlock(rwl); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_unlock(rwl); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h new file mode 100644 index 00000000..396a42e8 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h @@ -0,0 +1,92 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_ARM_H_ +#define _RTE_SPINLOCK_ARM_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_spinlock.h" + +static inline int rte_tm_supported(void) +{ + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_unlock(sl); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_unlock(slr); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + return rte_spinlock_recursive_trylock(slr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h new file mode 100644 index 00000000..a33c0544 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -0,0 +1,83 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Cavium Networks. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium Networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_ARM_H_ +#define _RTE_VECT_ARM_H_ + +#include "arm_neon.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int32x4_t xmm_t; + +#define XMM_SIZE (sizeof(xmm_t)) +#define XMM_MASK (XMM_SIZE - 1) + +typedef union rte_xmm { + xmm_t x; + uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; + double pd[XMM_SIZE / sizeof(double)]; +} __attribute__((aligned(16))) rte_xmm_t; + +#ifdef RTE_ARCH_ARM +/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */ +static __inline uint8x16_t +vqtbl1q_u8(uint8x16_t a, uint8x16_t b) +{ + uint8_t i, pos; + rte_xmm_t rte_a, rte_b, rte_ret; + + vst1q_u8(rte_a.u8, a); + vst1q_u8(rte_b.u8, b); + + for (i = 0; i < 16; i++) { + pos = rte_b.u8[i]; + if (pos < 16) + rte_ret.u8[i] = rte_a.u8[pos]; + else + rte_ret.u8[i] = 0; + } + + return vld1q_u8(rte_ret.u8); +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h new file mode 100644 index 00000000..feae4868 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -0,0 +1,432 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * Inspired from FreeBSD src/sys/powerpc/include/atomic.h + * Copyright (c) 2008 Marcel Moolenaar + * Copyright (c) 2001 Benno Rice + * Copyright (c) 2001 David E. O'Brien + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_PPC_64_H_ +#define _RTE_ATOMIC_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + */ +#define rte_mb() {asm volatile("sync" : : : "memory"); } + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + */ +#define rte_wmb() {asm volatile("sync" : : : "memory"); } + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + */ +#define rte_rmb() {asm volatile("sync" : : : "memory"); } + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + +/*------------------------- 16 bit atomic operations -------------------------*/ +/* To be compatible with Power7, use GCC built-in functions for 16 bit + * operations */ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE, + __ATOMIC_ACQUIRE) ? 1 : 0; +} + +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); +} + +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); +} + +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + return __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0; +} + +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0; +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + unsigned int ret = 0; + + asm volatile( + "\tlwsync\n" + "1:\tlwarx %[ret], 0, %[dst]\n" + "cmplw %[exp], %[ret]\n" + "bne 2f\n" + "stwcx. %[src], 0, %[dst]\n" + "bne- 1b\n" + "li %[ret], 1\n" + "b 3f\n" + "2:\n" + "stwcx. %[ret], 0, %[dst]\n" + "li %[ret], 0\n" + "3:\n" + "isync\n" + : [ret] "=&r" (ret), "=m" (*dst) + : [dst] "r" (dst), + [exp] "r" (exp), + [src] "r" (src), + "m" (*dst) + : "cc", "memory"); + + return ret; +} + +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + int t; + + asm volatile( + "1: lwarx %[t],0,%[cnt]\n" + "addic %[t],%[t],1\n" + "stwcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "=m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + int t; + + asm volatile( + "1: lwarx %[t],0,%[cnt]\n" + "addic %[t],%[t],-1\n" + "stwcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "=m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + int ret; + + asm volatile( + "\n\tlwsync\n" + "1: lwarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],1\n" + "stwcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} + +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + int ret; + + asm volatile( + "\n\tlwsync\n" + "1: lwarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],-1\n" + "stwcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} +/*------------------------- 64 bit atomic operations -------------------------*/ + +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + unsigned int ret = 0; + + asm volatile ( + "\tlwsync\n" + "1: ldarx %[ret], 0, %[dst]\n" + "cmpld %[exp], %[ret]\n" + "bne 2f\n" + "stdcx. %[src], 0, %[dst]\n" + "bne- 1b\n" + "li %[ret], 1\n" + "b 3f\n" + "2:\n" + "stdcx. %[ret], 0, %[dst]\n" + "li %[ret], 0\n" + "3:\n" + "isync\n" + : [ret] "=&r" (ret), "=m" (*dst) + : [dst] "r" (dst), + [exp] "r" (exp), + [src] "r" (src), + "m" (*dst) + : "cc", "memory"); + return ret; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + v->cnt = 0; +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + long ret; + + asm volatile("ld%U1%X1 %[ret],%[cnt]" + : [ret] "=r"(ret) + : [cnt] "m"(v->cnt)); + + return ret; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + asm volatile("std%U0%X0 %[new_value],%[cnt]" + : [cnt] "=m"(v->cnt) + : [new_value] "r"(new_value)); +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "add %[t],%[inc],%[t]\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "=m" (v->cnt) + : [cnt] "r" (&v->cnt), [inc] "r" (inc), "m" (v->cnt) + : "cc", "memory"); +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "subf %[t],%[dec],%[t]\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "+m" (v->cnt) + : [cnt] "r" (&v->cnt), [dec] "r" (dec), "m" (v->cnt) + : "cc", "memory"); +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "addic %[t],%[t],1\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "+m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "addic %[t],%[t],-1\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "+m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "add %[ret],%[inc],%[ret]\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [inc] "r" (inc), [cnt] "r" (&v->cnt) + : "cc", "memory"); + + return ret; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "subf %[ret],%[dec],%[ret]\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [dec] "r" (dec), [cnt] "r" (&v->cnt) + : "cc", "memory"); + + return ret; +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],1\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],-1\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +/** + * Atomically set a 64-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + v->cnt = 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h new file mode 100644 index 00000000..3c1734ed --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h @@ -0,0 +1,149 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Inspired from FreeBSD src/sys/powerpc/include/endian.h + * Copyright (c) 1987, 1991, 1993 + * The Regents of the University of California. All rights reserved. +*/ + +#ifndef _RTE_BYTEORDER_PPC_64_H_ +#define _RTE_BYTEORDER_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +/* + * An architecture-optimized byte swap for a 16-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap16(). + */ +static inline uint16_t rte_arch_bswap16(uint16_t _x) +{ + return (_x >> 8) | ((_x << 8) & 0xff00); +} + +/* + * An architecture-optimized byte swap for a 32-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap32(). + */ +static inline uint32_t rte_arch_bswap32(uint32_t _x) +{ + return (_x >> 24) | ((_x >> 8) & 0xff00) | ((_x << 8) & 0xff0000) | + ((_x << 24) & 0xff000000); +} + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* 64-bit mode */ +static inline uint64_t rte_arch_bswap64(uint64_t _x) +{ + return (_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff0000) | + ((_x >> 8) & 0xff000000) | ((_x << 8) & (0xffULL << 32)) | + ((_x << 24) & (0xffULL << 40)) | + ((_x << 40) & (0xffULL << 48)) | ((_x << 56)); +} + +#ifndef RTE_FORCE_INTRINSICS +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) + +#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap32(x) : \ + rte_arch_bswap32(x))) + +#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap64(x) : \ + rte_arch_bswap64(x))) +#else +/* + * __builtin_bswap16 is only available gcc 4.8 and upwards + */ +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) +#endif +#endif + +/* Power 8 have both little endian and big endian mode + * Power 7 only support big endian + */ +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#else /* RTE_BIG_ENDIAN */ + +#define rte_cpu_to_le_16(x) rte_bswap16(x) +#define rte_cpu_to_le_32(x) rte_bswap32(x) +#define rte_cpu_to_le_64(x) rte_bswap64(x) + +#define rte_cpu_to_be_16(x) (x) +#define rte_cpu_to_be_32(x) (x) +#define rte_cpu_to_be_64(x) (x) + +#define rte_le_to_cpu_16(x) rte_bswap16(x) +#define rte_le_to_cpu_32(x) rte_bswap32(x) +#define rte_le_to_cpu_64(x) rte_bswap64(x) + +#define rte_be_to_cpu_16(x) (x) +#define rte_be_to_cpu_32(x) (x) +#define rte_be_to_cpu_64(x) (x) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h new file mode 100644 index 00000000..7cc2b3c5 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h @@ -0,0 +1,88 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CPUFLAGS_PPC_64_H_ +#define _RTE_CPUFLAGS_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_PPC_LE = 0, + RTE_CPUFLAG_TRUE_LE, + RTE_CPUFLAG_PSERIES_PERFMON_COMPAT, + RTE_CPUFLAG_VSX, + RTE_CPUFLAG_ARCH_2_06, + RTE_CPUFLAG_POWER6_EXT, + RTE_CPUFLAG_DFP, + RTE_CPUFLAG_PA6T, + RTE_CPUFLAG_ARCH_2_05, + RTE_CPUFLAG_ICACHE_SNOOP, + RTE_CPUFLAG_SMT, + RTE_CPUFLAG_BOOKE, + RTE_CPUFLAG_CELLBE, + RTE_CPUFLAG_POWER5_PLUS, + RTE_CPUFLAG_POWER5, + RTE_CPUFLAG_POWER4, + RTE_CPUFLAG_NOTB, + RTE_CPUFLAG_EFP_DOUBLE, + RTE_CPUFLAG_EFP_SINGLE, + RTE_CPUFLAG_SPE, + RTE_CPUFLAG_UNIFIED_CACHE, + RTE_CPUFLAG_4xxMAC, + RTE_CPUFLAG_MMU, + RTE_CPUFLAG_FPU, + RTE_CPUFLAG_ALTIVEC, + RTE_CPUFLAG_PPC601, + RTE_CPUFLAG_PPC64, + RTE_CPUFLAG_PPC32, + RTE_CPUFLAG_TAR, + RTE_CPUFLAG_LSEL, + RTE_CPUFLAG_EBB, + RTE_CPUFLAG_DSCR, + RTE_CPUFLAG_HTM, + RTE_CPUFLAG_ARCH_2_07, + /* The last item */ + RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h new file mode 100644 index 00000000..64beddf9 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h @@ -0,0 +1,94 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CYCLES_PPC_64_H_ +#define _RTE_CYCLES_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +#include + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +static inline uint64_t +rte_rdtsc(void) +{ + union { + uint64_t tsc_64; + struct { +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + uint32_t hi_32; + uint32_t lo_32; +#else + uint32_t lo_32; + uint32_t hi_32; +#endif + }; + } tsc; + uint32_t tmp; + + asm volatile( + "0:\n" + "mftbu %[hi32]\n" + "mftb %[lo32]\n" + "mftbu %[tmp]\n" + "cmpw %[tmp],%[hi32]\n" + "bne 0b\n" + : [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32), + [tmp] "=r"(tmp) + ); + return tsc.tsc_64; +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h new file mode 100644 index 00000000..acf7aac2 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h @@ -0,0 +1,225 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCPY_PPC_64_H_ +#define _RTE_MEMCPY_PPC_64_H_ + +#include +#include +/*To include altivec.h, GCC version must >= 4.8 */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_memcpy.h" + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); + vec_vsx_st(vec_vsx_ld(32, src), 32, dst); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); + vec_vsx_st(vec_vsx_ld(32, src), 32, dst); + vec_vsx_st(vec_vsx_ld(48, src), 48, dst); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); + vec_vsx_st(vec_vsx_ld(32, src), 32, dst); + vec_vsx_st(vec_vsx_ld(48, src), 48, dst); + vec_vsx_st(vec_vsx_ld(64, src), 64, dst); + vec_vsx_st(vec_vsx_ld(80, src), 80, dst); + vec_vsx_st(vec_vsx_ld(96, src), 96, dst); + vec_vsx_st(vec_vsx_ld(112, src), 112, dst); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov128(dst, src); + rte_mov128(dst + 128, src + 128); +} + +#define rte_memcpy(dst, src, n) \ + ({ (__builtin_constant_p(n)) ? \ + memcpy((dst), (src), (n)) : \ + rte_memcpy_func((dst), (src), (n)); }) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* We can't copy < 16 bytes using XMM registers so do it manually. */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + dst = (uint8_t *)dst + 1; + src = (const uint8_t *)src + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + dst = (uint16_t *)dst + 1; + src = (const uint16_t *)src + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + dst = (uint32_t *)dst + 1; + src = (const uint32_t *)src + 1; + } + if (n & 0x08) + *(uint64_t *)dst = *(const uint64_t *)src; + return ret; + } + + /* Special fast cases for <= 128 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + return ret; + } + + if (n <= 128) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + + /* + * For large copies > 128 bytes. This combination of 256, 64 and 16 byte + * copies was found to be faster than doing 128 and 32 byte copies as + * well. + */ + for ( ; n >= 256; n -= 256) { + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 256; + src = (const uint8_t *)src + 256; + } + + /* + * We split the remaining bytes (which will be less than 256) into + * 64byte (2^6) chunks. + * Using incrementing integers in the case labels of a switch statement + * enourages the compiler to use a jump table. To get incrementing + * integers, we shift the 2 relevant bits to the LSB position to first + * get decrementing integers, and then subtract. + */ + switch (3 - (n >> 6)) { + case 0x00: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x01: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x02: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + default: + ; + } + + /* + * We split the remaining bytes (which will be less than 64) into + * 16byte (2^4) chunks, using the same switch structure as above. + */ + switch (3 - (n >> 4)) { + case 0x00: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x01: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x02: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + default: + ; + } + + /* Copy any remaining bytes, without going beyond end of buffers */ + if (n != 0) + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h new file mode 100644 index 00000000..9a1995ea --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h @@ -0,0 +1,67 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_PREFETCH_PPC_64_H_ +#define _RTE_PREFETCH_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + /* non-temporal version not available, fallback to rte_prefetch0 */ + rte_prefetch0(p); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h new file mode 100644 index 00000000..de8af19e --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h @@ -0,0 +1,38 @@ +#ifndef _RTE_RWLOCK_PPC_64_H_ +#define _RTE_RWLOCK_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_unlock(rwl); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_unlock(rwl); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h new file mode 100644 index 00000000..af139c9d --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h @@ -0,0 +1,114 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_SPINLOCK_PPC_64_H_ +#define _RTE_SPINLOCK_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_spinlock.h" + +/* Fixme: Use intrinsics to implement the spinlock on Power architecture */ + +#ifndef RTE_FORCE_INTRINSICS + +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + while (__sync_lock_test_and_set(&sl->locked, 1)) + while (sl->locked) + rte_pause(); +} + +static inline void +rte_spinlock_unlock(rte_spinlock_t *sl) +{ + __sync_lock_release(&sl->locked); +} + +static inline int +rte_spinlock_trylock(rte_spinlock_t *sl) +{ + return __sync_lock_test_and_set(&sl->locked, 1) == 0; +} + +#endif + +static inline int rte_tm_supported(void) +{ + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_unlock(sl); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_unlock(slr); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + return rte_spinlock_recursive_trylock(slr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_eal/common/include/arch/tile/rte_atomic.h new file mode 100644 index 00000000..28825ff6 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_atomic.h @@ -0,0 +1,92 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_ATOMIC_TILE_H_ +#define _RTE_ATOMIC_TILE_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_mb(void) +{ + __sync_synchronize(); +} + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_wmb(void) +{ + __sync_synchronize(); +} + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + * This function is architecture dependent. + */ +static inline void rte_rmb(void) +{ + __sync_synchronize(); +} + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h new file mode 100644 index 00000000..7239e437 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h @@ -0,0 +1,91 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_BYTEORDER_TILE_H_ +#define _RTE_BYTEORDER_TILE_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) +#define rte_bswap16(x) rte_constant_bswap16(x) +#endif + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#else /* RTE_BIG_ENDIAN */ + +#define rte_cpu_to_le_16(x) rte_bswap16(x) +#define rte_cpu_to_le_32(x) rte_bswap32(x) +#define rte_cpu_to_le_64(x) rte_bswap64(x) + +#define rte_cpu_to_be_16(x) (x) +#define rte_cpu_to_be_32(x) (x) +#define rte_cpu_to_be_64(x) (x) + +#define rte_le_to_cpu_16(x) rte_bswap16(x) +#define rte_le_to_cpu_32(x) rte_bswap32(x) +#define rte_le_to_cpu_64(x) rte_bswap64(x) + +#define rte_be_to_cpu_16(x) (x) +#define rte_be_to_cpu_32(x) (x) +#define rte_be_to_cpu_64(x) (x) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h new file mode 100644 index 00000000..1849b520 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h @@ -0,0 +1,53 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CPUFLAGS_TILE_H_ +#define _RTE_CPUFLAGS_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_NUMFLAGS /**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_cycles.h b/lib/librte_eal/common/include/arch/tile/rte_cycles.h new file mode 100644 index 00000000..0b2200a3 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_cycles.h @@ -0,0 +1,70 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CYCLES_TILE_H_ +#define _RTE_CYCLES_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "generic/rte_cycles.h" + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +static inline uint64_t +rte_rdtsc(void) +{ + return get_cycle_count(); +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h new file mode 100644 index 00000000..9b5b37ef --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h @@ -0,0 +1,93 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCPY_TILE_H_ +#define _RTE_MEMCPY_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "generic/rte_memcpy.h" + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 16); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 32); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 48); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 64); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 128); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 256); +} + +#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h new file mode 100644 index 00000000..7a1bb93e --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h @@ -0,0 +1,67 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_PREFETCH_TILE_H_ +#define _RTE_PREFETCH_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + __builtin_prefetch((const void *)(uintptr_t)p, 0, 3); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + __builtin_prefetch((const void *)(uintptr_t)p, 0, 2); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + __builtin_prefetch((const void *)(uintptr_t)p, 0, 1); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + /* non-temporal version not available, fallback to rte_prefetch0 */ + rte_prefetch0(p); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h new file mode 100644 index 00000000..8f67a190 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h @@ -0,0 +1,70 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_RWLOCK_TILE_H_ +#define _RTE_RWLOCK_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_unlock(rwl); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_unlock(rwl); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h new file mode 100644 index 00000000..e91f99ee --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h @@ -0,0 +1,92 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_SPINLOCK_TILE_H_ +#define _RTE_SPINLOCK_TILE_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_spinlock.h" + +static inline int rte_tm_supported(void) +{ + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_unlock(sl); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_unlock(slr); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + return rte_spinlock_recursive_trylock(slr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h new file mode 100644 index 00000000..b20056b8 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_X86_H_ +#define _RTE_ATOMIC_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_atomic.h" + +#if RTE_MAX_LCORE == 1 +#define MPLOCKED /**< No need to insert MP lock prefix. */ +#else +#define MPLOCKED "lock ; " /**< Insert MP lock prefix. */ +#endif + +#define rte_mb() _mm_mfence() + +#define rte_wmb() _mm_sfence() + +#define rte_rmb() _mm_lfence() + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + +/*------------------------- 16 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + uint8_t res; + + asm volatile( + MPLOCKED + "cmpxchgw %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + return res; +} + +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + asm volatile( + MPLOCKED + "incw %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + asm volatile( + MPLOCKED + "decw %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incw %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + uint8_t ret; + + asm volatile(MPLOCKED + "decw %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + uint8_t res; + + asm volatile( + MPLOCKED + "cmpxchgl %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + return res; +} + +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + asm volatile( + MPLOCKED + "incl %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + asm volatile( + MPLOCKED + "decl %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incl %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + uint8_t ret; + + asm volatile(MPLOCKED + "decl %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} +#endif + +#ifdef RTE_ARCH_I686 +#include "rte_atomic_32.h" +#else +#include "rte_atomic_64.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_X86_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h new file mode 100644 index 00000000..400d8a96 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Inspired from FreeBSD src/sys/i386/include/atomic.h + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_I686_H_ +#define _RTE_ATOMIC_I686_H_ + +/*------------------------- 64 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + uint8_t res; + union { + struct { + uint32_t l32; + uint32_t h32; + }; + uint64_t u64; + } _exp, _src; + + _exp.u64 = exp; + _src.u64 = src; + +#ifndef __PIC__ + asm volatile ( + MPLOCKED + "cmpxchg8b (%[dst]);" + "setz %[res];" + : [res] "=a" (res) /* result in eax */ + : [dst] "S" (dst), /* esi */ + "b" (_src.l32), /* ebx */ + "c" (_src.h32), /* ecx */ + "a" (_exp.l32), /* eax */ + "d" (_exp.h32) /* edx */ + : "memory" ); /* no-clobber list */ +#else + asm volatile ( + "mov %%ebx, %%edi\n" + MPLOCKED + "cmpxchg8b (%[dst]);" + "setz %[res];" + "xchgl %%ebx, %%edi;\n" + : [res] "=a" (res) /* result in eax */ + : [dst] "S" (dst), /* esi */ + "D" (_src.l32), /* ebx */ + "c" (_src.h32), /* ecx */ + "a" (_exp.l32), /* eax */ + "d" (_exp.h32) /* edx */ + : "memory" ); /* no-clobber list */ +#endif + + return res; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, 0); + } +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + /* replace the value by itself */ + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp); + } + return tmp; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, new_value); + } +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp + inc); + } +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp - dec); + } +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + rte_atomic64_add(v, 1); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + rte_atomic64_sub(v, 1); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp + inc); + } + + return tmp + inc; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp - dec); + } + + return tmp - dec; +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_add_return(v, 1) == 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_sub_return(v, 1) == 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + rte_atomic64_set(v, 0); +} +#endif + +#endif /* _RTE_ATOMIC_I686_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h new file mode 100644 index 00000000..4de66000 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h @@ -0,0 +1,191 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Inspired from FreeBSD src/sys/amd64/include/atomic.h + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_X86_64_H_ +#define _RTE_ATOMIC_X86_64_H_ + +/*------------------------- 64 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + uint8_t res; + + + asm volatile( + MPLOCKED + "cmpxchgq %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + + return res; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + v->cnt = 0; +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + return v->cnt; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + v->cnt = new_value; +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + asm volatile( + MPLOCKED + "addq %[inc], %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : [inc] "ir" (inc), /* input */ + "m" (v->cnt) + ); +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + asm volatile( + MPLOCKED + "subq %[dec], %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : [dec] "ir" (dec), /* input */ + "m" (v->cnt) + ); +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + asm volatile( + MPLOCKED + "incq %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + asm volatile( + MPLOCKED + "decq %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + int64_t prev = inc; + + asm volatile( + MPLOCKED + "xaddq %[prev], %[cnt]" + : [prev] "+r" (prev), /* output */ + [cnt] "=m" (v->cnt) + : "m" (v->cnt) /* input */ + ); + return prev + inc; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + return rte_atomic64_add_return(v, -dec); +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incq %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + + return ret != 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "decq %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + v->cnt = 0; +} +#endif + +#endif /* _RTE_ATOMIC_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h new file mode 100644 index 00000000..ffdb6ef5 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h @@ -0,0 +1,125 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_X86_H_ +#define _RTE_BYTEORDER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +#ifndef RTE_BYTE_ORDER +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif + +/* + * An architecture-optimized byte swap for a 16-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap16(). + */ +static inline uint16_t rte_arch_bswap16(uint16_t _x) +{ + register uint16_t x = _x; + asm volatile ("xchgb %b[x1],%h[x2]" + : [x1] "=Q" (x) + : [x2] "0" (x) + ); + return x; +} + +/* + * An architecture-optimized byte swap for a 32-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap32(). + */ +static inline uint32_t rte_arch_bswap32(uint32_t _x) +{ + register uint32_t x = _x; + asm volatile ("bswap %[x]" + : [x] "+r" (x) + ); + return x; +} + +#ifndef RTE_FORCE_INTRINSICS +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) + +#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap32(x) : \ + rte_arch_bswap32(x))) + +#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap64(x) : \ + rte_arch_bswap64(x))) +#else +/* + * __builtin_bswap16 is only available gcc 4.8 and upwards + */ +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) +#endif +#endif + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#ifdef RTE_ARCH_I686 +#include "rte_byteorder_32.h" +#else +#include "rte_byteorder_64.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_X86_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h new file mode 100644 index 00000000..51c306f8 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h @@ -0,0 +1,51 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_I686_H_ +#define _RTE_BYTEORDER_I686_H_ + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* Compat./Leg. mode */ +static inline uint64_t rte_arch_bswap64(uint64_t x) +{ + uint64_t ret = 0; + ret |= ((uint64_t)rte_arch_bswap32(x & 0xffffffffUL) << 32); + ret |= ((uint64_t)rte_arch_bswap32((x >> 32) & 0xffffffffUL)); + return ret; +} + +#endif /* _RTE_BYTEORDER_I686_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h new file mode 100644 index 00000000..dda572bd --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h @@ -0,0 +1,52 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_X86_64_H_ +#define _RTE_BYTEORDER_X86_64_H_ + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* 64-bit mode */ +static inline uint64_t rte_arch_bswap64(uint64_t _x) +{ + register uint64_t x = _x; + asm volatile ("bswap %[x]" + : [x] "+r" (x) + ); + return x; +} + +#endif /* _RTE_BYTEORDER_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h new file mode 100644 index 00000000..26204fab --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_X86_64_H_ +#define _RTE_CPUFLAGS_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum rte_cpu_flag_t { + /* (EAX 01h) ECX features*/ + RTE_CPUFLAG_SSE3 = 0, /**< SSE3 */ + RTE_CPUFLAG_PCLMULQDQ, /**< PCLMULQDQ */ + RTE_CPUFLAG_DTES64, /**< DTES64 */ + RTE_CPUFLAG_MONITOR, /**< MONITOR */ + RTE_CPUFLAG_DS_CPL, /**< DS_CPL */ + RTE_CPUFLAG_VMX, /**< VMX */ + RTE_CPUFLAG_SMX, /**< SMX */ + RTE_CPUFLAG_EIST, /**< EIST */ + RTE_CPUFLAG_TM2, /**< TM2 */ + RTE_CPUFLAG_SSSE3, /**< SSSE3 */ + RTE_CPUFLAG_CNXT_ID, /**< CNXT_ID */ + RTE_CPUFLAG_FMA, /**< FMA */ + RTE_CPUFLAG_CMPXCHG16B, /**< CMPXCHG16B */ + RTE_CPUFLAG_XTPR, /**< XTPR */ + RTE_CPUFLAG_PDCM, /**< PDCM */ + RTE_CPUFLAG_PCID, /**< PCID */ + RTE_CPUFLAG_DCA, /**< DCA */ + RTE_CPUFLAG_SSE4_1, /**< SSE4_1 */ + RTE_CPUFLAG_SSE4_2, /**< SSE4_2 */ + RTE_CPUFLAG_X2APIC, /**< X2APIC */ + RTE_CPUFLAG_MOVBE, /**< MOVBE */ + RTE_CPUFLAG_POPCNT, /**< POPCNT */ + RTE_CPUFLAG_TSC_DEADLINE, /**< TSC_DEADLINE */ + RTE_CPUFLAG_AES, /**< AES */ + RTE_CPUFLAG_XSAVE, /**< XSAVE */ + RTE_CPUFLAG_OSXSAVE, /**< OSXSAVE */ + RTE_CPUFLAG_AVX, /**< AVX */ + RTE_CPUFLAG_F16C, /**< F16C */ + RTE_CPUFLAG_RDRAND, /**< RDRAND */ + + /* (EAX 01h) EDX features */ + RTE_CPUFLAG_FPU, /**< FPU */ + RTE_CPUFLAG_VME, /**< VME */ + RTE_CPUFLAG_DE, /**< DE */ + RTE_CPUFLAG_PSE, /**< PSE */ + RTE_CPUFLAG_TSC, /**< TSC */ + RTE_CPUFLAG_MSR, /**< MSR */ + RTE_CPUFLAG_PAE, /**< PAE */ + RTE_CPUFLAG_MCE, /**< MCE */ + RTE_CPUFLAG_CX8, /**< CX8 */ + RTE_CPUFLAG_APIC, /**< APIC */ + RTE_CPUFLAG_SEP, /**< SEP */ + RTE_CPUFLAG_MTRR, /**< MTRR */ + RTE_CPUFLAG_PGE, /**< PGE */ + RTE_CPUFLAG_MCA, /**< MCA */ + RTE_CPUFLAG_CMOV, /**< CMOV */ + RTE_CPUFLAG_PAT, /**< PAT */ + RTE_CPUFLAG_PSE36, /**< PSE36 */ + RTE_CPUFLAG_PSN, /**< PSN */ + RTE_CPUFLAG_CLFSH, /**< CLFSH */ + RTE_CPUFLAG_DS, /**< DS */ + RTE_CPUFLAG_ACPI, /**< ACPI */ + RTE_CPUFLAG_MMX, /**< MMX */ + RTE_CPUFLAG_FXSR, /**< FXSR */ + RTE_CPUFLAG_SSE, /**< SSE */ + RTE_CPUFLAG_SSE2, /**< SSE2 */ + RTE_CPUFLAG_SS, /**< SS */ + RTE_CPUFLAG_HTT, /**< HTT */ + RTE_CPUFLAG_TM, /**< TM */ + RTE_CPUFLAG_PBE, /**< PBE */ + + /* (EAX 06h) EAX features */ + RTE_CPUFLAG_DIGTEMP, /**< DIGTEMP */ + RTE_CPUFLAG_TRBOBST, /**< TRBOBST */ + RTE_CPUFLAG_ARAT, /**< ARAT */ + RTE_CPUFLAG_PLN, /**< PLN */ + RTE_CPUFLAG_ECMD, /**< ECMD */ + RTE_CPUFLAG_PTM, /**< PTM */ + + /* (EAX 06h) ECX features */ + RTE_CPUFLAG_MPERF_APERF_MSR, /**< MPERF_APERF_MSR */ + RTE_CPUFLAG_ACNT2, /**< ACNT2 */ + RTE_CPUFLAG_ENERGY_EFF, /**< ENERGY_EFF */ + + /* (EAX 07h, ECX 0h) EBX features */ + RTE_CPUFLAG_FSGSBASE, /**< FSGSBASE */ + RTE_CPUFLAG_BMI1, /**< BMI1 */ + RTE_CPUFLAG_HLE, /**< Hardware Lock elision */ + RTE_CPUFLAG_AVX2, /**< AVX2 */ + RTE_CPUFLAG_SMEP, /**< SMEP */ + RTE_CPUFLAG_BMI2, /**< BMI2 */ + RTE_CPUFLAG_ERMS, /**< ERMS */ + RTE_CPUFLAG_INVPCID, /**< INVPCID */ + RTE_CPUFLAG_RTM, /**< Transactional memory */ + RTE_CPUFLAG_AVX512F, /**< AVX512F */ + + /* (EAX 80000001h) ECX features */ + RTE_CPUFLAG_LAHF_SAHF, /**< LAHF_SAHF */ + RTE_CPUFLAG_LZCNT, /**< LZCNT */ + + /* (EAX 80000001h) EDX features */ + RTE_CPUFLAG_SYSCALL, /**< SYSCALL */ + RTE_CPUFLAG_XD, /**< XD */ + RTE_CPUFLAG_1GB_PG, /**< 1GB_PG */ + RTE_CPUFLAG_RDTSCP, /**< RDTSCP */ + RTE_CPUFLAG_EM64T, /**< EM64T */ + + /* (EAX 80000007h) EDX features */ + RTE_CPUFLAG_INVTSC, /**< INVTSC */ + + /* The last item */ + RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h new file mode 100644 index 00000000..6e3c7d89 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h @@ -0,0 +1,121 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_X86_64_H_ +#define _RTE_CYCLES_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT +/* Global switch to use VMWARE mapping of TSC instead of RDTSC */ +extern int rte_cycles_vmware_tsc_map; +#include +#endif + +static inline uint64_t +rte_rdtsc(void) +{ + union { + uint64_t tsc_64; + struct { + uint32_t lo_32; + uint32_t hi_32; + }; + } tsc; + +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + if (unlikely(rte_cycles_vmware_tsc_map)) { + /* ecx = 0x10000 corresponds to the physical TSC for VMware */ + asm volatile("rdpmc" : + "=a" (tsc.lo_32), + "=d" (tsc.hi_32) : + "c"(0x10000)); + return tsc.tsc_64; + } +#endif + + asm volatile("rdtsc" : + "=a" (tsc.lo_32), + "=d" (tsc.hi_32)); + return tsc.tsc_64; +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h new file mode 100644 index 00000000..f463ab30 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -0,0 +1,884 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_X86_64_H_ +#define _RTE_MEMCPY_X86_64_H_ + +/** + * @file + * + * Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy(). + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Copy bytes from one location to another. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + * @param n + * Number of bytes to copy. + * @return + * Pointer to the destination data. + */ +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); + +#ifdef RTE_MACHINE_CPUFLAG_AVX512F + +/** + * AVX512 implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + __m256i ymm0; + + ymm0 = _mm256_loadu_si256((const __m256i *)src); + _mm256_storeu_si256((__m256i *)dst, ymm0); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + __m512i zmm0; + + zmm0 = _mm512_loadu_si512((const void *)src); + _mm512_storeu_si512((void *)dst, zmm0); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov64(dst + 0 * 64, src + 0 * 64); + rte_mov64(dst + 1 * 64, src + 1 * 64); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov64(dst + 0 * 64, src + 0 * 64); + rte_mov64(dst + 1 * 64, src + 1 * 64); + rte_mov64(dst + 2 * 64, src + 2 * 64); + rte_mov64(dst + 3 * 64, src + 3 * 64); +} + +/** + * Copy 128-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m512i zmm0, zmm1; + + while (n >= 128) { + zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64)); + n -= 128; + zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64)); + src = src + 128; + _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0); + _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1); + dst = dst + 128; + } +} + +/** + * Copy 512-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; + + while (n >= 512) { + zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64)); + n -= 512; + zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64)); + zmm2 = _mm512_loadu_si512((const void *)(src + 2 * 64)); + zmm3 = _mm512_loadu_si512((const void *)(src + 3 * 64)); + zmm4 = _mm512_loadu_si512((const void *)(src + 4 * 64)); + zmm5 = _mm512_loadu_si512((const void *)(src + 5 * 64)); + zmm6 = _mm512_loadu_si512((const void *)(src + 6 * 64)); + zmm7 = _mm512_loadu_si512((const void *)(src + 7 * 64)); + src = src + 512; + _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0); + _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1); + _mm512_storeu_si512((void *)(dst + 2 * 64), zmm2); + _mm512_storeu_si512((void *)(dst + 3 * 64), zmm3); + _mm512_storeu_si512((void *)(dst + 4 * 64), zmm4); + _mm512_storeu_si512((void *)(dst + 5 * 64), zmm5); + _mm512_storeu_si512((void *)(dst + 6 * 64), zmm6); + _mm512_storeu_si512((void *)(dst + 7 * 64), zmm7); + dst = dst + 512; + } +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t bits; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) + *(uint64_t *)dstu = *(const uint64_t *)srcu; + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + return ret; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } +COPY_BLOCK_128_BACK63: + if (n > 64) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + if (n > 0) + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes + */ + dstofss = ((uintptr_t)dst & 0x3F); + if (dstofss > 0) { + dstofss = 64 - dstofss; + n -= dstofss; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + } + + /** + * Copy 512-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + rte_mov512blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 511; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + + /** + * Copy 128-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + if (n >= 128) { + rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 127; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_128_BACK63; +} + +#elif defined RTE_MACHINE_CPUFLAG_AVX2 + +/** + * AVX2 implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + __m256i ymm0; + + ymm0 = _mm256_loadu_si256((const __m256i *)src); + _mm256_storeu_si256((__m256i *)dst, ymm0); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); + rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); + rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); + rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); + rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); + rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32); + rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32); + rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32); + rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32); +} + +/** + * Copy 64-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m256i ymm0, ymm1; + + while (n >= 64) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); + n -= 64; + ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); + src = (const uint8_t *)src + 64; + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); + dst = (uint8_t *)dst + 64; + } +} + +/** + * Copy 256-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + + while (n >= 256) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); + n -= 256; + ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); + ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32)); + ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32)); + ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32)); + ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32)); + ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32)); + ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32)); + src = (const uint8_t *)src + 256; + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7); + dst = (uint8_t *)dst + 256; + } +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t bits; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) { + *(uint64_t *)dstu = *(const uint64_t *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + return ret; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } + if (n >= 64) { + n -= 64; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 64; + dst = (uint8_t *)dst + 64; + } +COPY_BLOCK_64_BACK31: + if (n > 32) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + return ret; + } + if (n > 0) { + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes + */ + dstofss = (uintptr_t)dst & 0x1F; + if (dstofss > 0) { + dstofss = 32 - dstofss; + n -= dstofss; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + } + + /** + * Copy 256-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 255; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + + /** + * Copy 64-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + if (n >= 64) { + rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 63; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK31; +} + +#else /* RTE_MACHINE_CPUFLAG */ + +/** + * SSE & AVX implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); + rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); + rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); + rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); + rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); + rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); + rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); + rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); + rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); + rte_mov16((uint8_t *)dst + 8 * 16, (const uint8_t *)src + 8 * 16); + rte_mov16((uint8_t *)dst + 9 * 16, (const uint8_t *)src + 9 * 16); + rte_mov16((uint8_t *)dst + 10 * 16, (const uint8_t *)src + 10 * 16); + rte_mov16((uint8_t *)dst + 11 * 16, (const uint8_t *)src + 11 * 16); + rte_mov16((uint8_t *)dst + 12 * 16, (const uint8_t *)src + 12 * 16); + rte_mov16((uint8_t *)dst + 13 * 16, (const uint8_t *)src + 13 * 16); + rte_mov16((uint8_t *)dst + 14 * 16, (const uint8_t *)src + 14 * 16); + rte_mov16((uint8_t *)dst + 15 * 16, (const uint8_t *)src + 15 * 16); +} + +/** + * Macro for copying unaligned block from one location to another with constant load offset, + * 47 bytes leftover maximum, + * locations should not overlap. + * Requirements: + * - Store is aligned + * - Load offset is , which must be immediate value within [1, 15] + * - For , make sure bit backwards & <16 - offset> bit forwards are available for loading + * - , , must be variables + * - __m128i ~ must be pre-defined + */ +#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ +({ \ + int tmp; \ + while (len >= 128 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 128; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ + xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \ + xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \ + xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \ + xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \ + xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \ + xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \ + src = (const uint8_t *)src + 128; \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \ + dst = (uint8_t *)dst + 128; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 127) + 16 - offset; \ + tmp -= len; \ + src = (const uint8_t *)src + tmp; \ + dst = (uint8_t *)dst + tmp; \ + if (len >= 32 + 16 - offset) { \ + while (len >= 32 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 32; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ + src = (const uint8_t *)src + 32; \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + dst = (uint8_t *)dst + 32; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 31) + 16 - offset; \ + tmp -= len; \ + src = (const uint8_t *)src + tmp; \ + dst = (uint8_t *)dst + tmp; \ + } \ +}) + +/** + * Macro for copying unaligned block from one location to another, + * 47 bytes leftover maximum, + * locations should not overlap. + * Use switch here because the aligning instruction requires immediate value for shift count. + * Requirements: + * - Store is aligned + * - Load offset is , which must be within [1, 15] + * - For , make sure bit backwards & <16 - offset> bit forwards are available for loading + * - , , must be variables + * - __m128i ~ used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined + */ +#define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \ +({ \ + switch (offset) { \ + case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \ + case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \ + case 0x03: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x03); break; \ + case 0x04: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x04); break; \ + case 0x05: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x05); break; \ + case 0x06: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x06); break; \ + case 0x07: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x07); break; \ + case 0x08: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x08); break; \ + case 0x09: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x09); break; \ + case 0x0A: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0A); break; \ + case 0x0B: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0B); break; \ + case 0x0C: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0C); break; \ + case 0x0D: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0D); break; \ + case 0x0E: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0E); break; \ + case 0x0F: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0F); break; \ + default:; \ + } \ +}) + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t srcofs; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) { + *(uint64_t *)dstu = *(const uint64_t *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 48) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst + 32, (const uint8_t *)src + 32); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 128) { + goto COPY_BLOCK_128_BACK15; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } +COPY_BLOCK_255_BACK15: + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } +COPY_BLOCK_128_BACK15: + if (n >= 64) { + n -= 64; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 64; + dst = (uint8_t *)dst + 64; + } +COPY_BLOCK_64_BACK15: + if (n >= 32) { + n -= 32; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 32; + dst = (uint8_t *)dst + 32; + } + if (n > 16) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n > 0) { + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes, + * and make sure the first 15 bytes are copied, because + * unaligned copy functions require up to 15 bytes + * backwards access. + */ + dstofss = (uintptr_t)dst & 0x0F; + if (dstofss > 0) { + dstofss = 16 - dstofss + 16; + n -= dstofss; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + } + srcofs = ((uintptr_t)src & 0x0F); + + /** + * For aligned copy + */ + if (srcofs == 0) { + /** + * Copy 256-byte blocks + */ + for (; n >= 256; n -= 256) { + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 256; + src = (const uint8_t *)src + 256; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_255_BACK15; + } + + /** + * For copy with unaligned load + */ + MOVEUNALIGNED_LEFT47(dst, src, n, srcofs); + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK15; +} + +#endif /* RTE_MACHINE_CPUFLAG */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h new file mode 100644 index 00000000..5dac47eb --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_X86_64_H_ +#define _RTE_PREFETCH_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("prefetcht1 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h new file mode 100644 index 00000000..d9356419 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -0,0 +1,73 @@ +#ifndef _RTE_RTM_H_ +#define _RTE_RTM_H_ 1 + +/* + * Copyright (c) 2012,2013 Intel Corporation + * Author: Andi Kleen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that: (1) source code distributions + * retain the above copyright notice and this paragraph in its entirety, (2) + * distributions including binary code include the above copyright notice and + * this paragraph in its entirety in the documentation or other materials + * provided with the distribution + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* Official RTM intrinsics interface matching gcc/icc, but works + on older gcc compatible compilers and binutils. */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define RTE_XBEGIN_STARTED (~0u) +#define RTE_XABORT_EXPLICIT (1 << 0) +#define RTE_XABORT_RETRY (1 << 1) +#define RTE_XABORT_CONFLICT (1 << 2) +#define RTE_XABORT_CAPACITY (1 << 3) +#define RTE_XABORT_DEBUG (1 << 4) +#define RTE_XABORT_NESTED (1 << 5) +#define RTE_XABORT_CODE(x) (((x) >> 24) & 0xff) + +static __attribute__((__always_inline__)) inline +unsigned int rte_xbegin(void) +{ + unsigned int ret = RTE_XBEGIN_STARTED; + + asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory"); + return ret; +} + +static __attribute__((__always_inline__)) inline +void rte_xend(void) +{ + asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory"); +} + +static __attribute__((__always_inline__)) inline +void rte_xabort(const unsigned int status) +{ + asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); +} + +static __attribute__((__always_inline__)) inline +int rte_xtest(void) +{ + unsigned char out; + + asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" : + "=r" (out) :: "memory"); + return out; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RTM_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_rwlock.h b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h new file mode 100644 index 00000000..afd1c3c2 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RWLOCK_X86_64_H_ +#define _RTE_RWLOCK_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" +#include "rte_spinlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + if (likely(rte_try_tm(&rwl->cnt))) + return; + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + if (unlikely(rwl->cnt)) + rte_rwlock_read_unlock(rwl); + else + rte_xend(); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + if (likely(rte_try_tm(&rwl->cnt))) + return; + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + if (unlikely(rwl->cnt)) + rte_rwlock_write_unlock(rwl); + else + rte_xend(); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h new file mode 100644 index 00000000..02f95cbb --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -0,0 +1,201 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_X86_64_H_ +#define _RTE_SPINLOCK_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_spinlock.h" +#include "rte_rtm.h" +#include "rte_cpuflags.h" +#include "rte_branch_prediction.h" +#include "rte_common.h" + +#define RTE_RTM_MAX_RETRIES (10) +#define RTE_XABORT_LOCK_BUSY (0xff) + +#ifndef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + int lock_val = 1; + asm volatile ( + "1:\n" + "xchg %[locked], %[lv]\n" + "test %[lv], %[lv]\n" + "jz 3f\n" + "2:\n" + "pause\n" + "cmpl $0, %[locked]\n" + "jnz 2b\n" + "jmp 1b\n" + "3:\n" + : [locked] "=m" (sl->locked), [lv] "=q" (lock_val) + : "[lv]" (lock_val) + : "memory"); +} + +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl) +{ + int unlock_val = 0; + asm volatile ( + "xchg %[locked], %[ulv]\n" + : [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val) + : "[ulv]" (unlock_val) + : "memory"); +} + +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl) +{ + int lockval = 1; + + asm volatile ( + "xchg %[locked], %[lockval]" + : [locked] "=m" (sl->locked), [lockval] "=q" (lockval) + : "[lockval]" (lockval) + : "memory"); + + return lockval == 0; +} +#endif + +static uint8_t rtm_supported; /* cache the flag to avoid the overhead + of the rte_cpu_get_flag_enabled function */ + +static inline void __attribute__((constructor)) +rte_rtm_init(void) +{ + rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM); +} + +static inline int rte_tm_supported(void) +{ + return rtm_supported; +} + +static inline int +rte_try_tm(volatile int *lock) +{ + if (!rtm_supported) + return 0; + + int retries = RTE_RTM_MAX_RETRIES; + + while (likely(retries--)) { + + unsigned int status = rte_xbegin(); + + if (likely(RTE_XBEGIN_STARTED == status)) { + if (unlikely(*lock)) + rte_xabort(RTE_XABORT_LOCK_BUSY); + else + return 1; + } + while (*lock) + rte_pause(); + + if ((status & RTE_XABORT_EXPLICIT) && + (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY)) + continue; + + if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */ + break; + } + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return; + + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return 1; + + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + if (unlikely(sl->locked)) + rte_spinlock_unlock(sl); + else + rte_xend(); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return; + + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + if (unlikely(slr->sl.locked)) + rte_spinlock_recursive_unlock(slr); + else + rte_xend(); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return 1; + + return rte_spinlock_recursive_trylock(slr); +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h new file mode 100644 index 00000000..b698797c --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -0,0 +1,132 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_H_ +#define _RTE_VECT_H_ + +/** + * @file + * + * RTE SSE/AVX related header. + */ + +#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) + +#ifdef __SSE__ +#include +#endif + +#ifdef __SSE2__ +#include +#endif + +#ifdef __SSE3__ +#include +#endif + +#if defined(__SSE4_2__) || defined(__SSE4_1__) +#include +#endif + +#if defined(__AVX__) +#include +#endif + +#else + +#include + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef __m128i xmm_t; + +#define XMM_SIZE (sizeof(xmm_t)) +#define XMM_MASK (XMM_SIZE - 1) + +typedef union rte_xmm { + xmm_t x; + uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; + double pd[XMM_SIZE / sizeof(double)]; +} rte_xmm_t; + +#ifdef __AVX__ + +typedef __m256i ymm_t; + +#define YMM_SIZE (sizeof(ymm_t)) +#define YMM_MASK (YMM_SIZE - 1) + +typedef union rte_ymm { + ymm_t y; + xmm_t x[YMM_SIZE / sizeof(xmm_t)]; + uint8_t u8[YMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[YMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[YMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[YMM_SIZE / sizeof(uint64_t)]; + double pd[YMM_SIZE / sizeof(double)]; +} rte_ymm_t; + +#endif /* __AVX__ */ + +#ifdef RTE_ARCH_I686 +#define _mm_cvtsi128_si64(a) ({ \ + rte_xmm_t m; \ + m.x = (a); \ + (m.u64[0]); \ +}) +#endif + +/* + * Prior to version 12.1 icc doesn't support _mm_set_epi64x. + */ +#if (defined(__ICC) && __ICC < 1210) +#define _mm_set_epi64x(a, b) ({ \ + rte_xmm_t m; \ + m.u64[0] = b; \ + m.u64[1] = a; \ + (m.x); \ +}) +#endif /* (defined(__ICC) && __ICC < 1210) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VECT_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h new file mode 100644 index 00000000..bfb4fe44 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -0,0 +1,945 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_H_ +#define _RTE_ATOMIC_H_ + +/** + * @file + * Atomic Operations + * + * This file defines a generic API for atomic operations. + */ + +#include + +#ifdef __DOXYGEN__ + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_mb(void); + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_wmb(void); + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + * This function is architecture dependent. + */ +static inline void rte_rmb(void); + +/** + * General memory barrier between lcores + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_smp_mb() call are globally visible across the lcores + * before the the LOAD and STORE operations that follows it. + */ +static inline void rte_smp_mb(void); + +/** + * Write memory barrier between lcores + * + * Guarantees that the STORE operations that precede the + * rte_smp_wmb() call are globally visible across the lcores + * before the the STORE operations that follows it. + */ +static inline void rte_smp_wmb(void); + +/** + * Read memory barrier between lcores + * + * Guarantees that the LOAD operations that precede the + * rte_smp_rmb() call are globally visible across the lcores + * before the the LOAD operations that follows it. + */ +static inline void rte_smp_rmb(void); + +#endif /* __DOXYGEN__ */ + +/** + * Compiler barrier. + * + * Guarantees that operation reordering does not occur at compile time + * for operations directly before and after the barrier. + */ +#define rte_compiler_barrier() do { \ + asm volatile ("" : : : "memory"); \ +} while(0) + +/*------------------------- 16 bit atomic operations -------------------------*/ + +/** + * Atomic compare and set. + * + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 16-bit words) + * + * @param dst + * The destination location into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int16_t cnt; /**< An internal counter value. */ +} rte_atomic16_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC16_INIT(val) { (val) } + +/** + * Initialize an atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_init(rte_atomic16_t *v) +{ + v->cnt = 0; +} + +/** + * Atomically read a 16-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int16_t +rte_atomic16_read(const rte_atomic16_t *v) +{ + return v->cnt; +} + +/** + * Atomically set a counter to a 16-bit value. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value for the counter. + */ +static inline void +rte_atomic16_set(rte_atomic16_t *v, int16_t new_value) +{ + v->cnt = new_value; +} + +/** + * Atomically add a 16-bit value to an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic16_add(rte_atomic16_t *v, int16_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} + +/** + * Atomically subtract a 16-bit value from an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic16_sub(rte_atomic16_t *v, int16_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} + +/** + * Atomically increment a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_inc(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + rte_atomic16_add(v, 1); +} +#endif + +/** + * Atomically decrement a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_dec(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + rte_atomic16_sub(v, 1); +} +#endif + +/** + * Atomically add a 16-bit value to a counter and return the result. + * + * Atomically adds the 16-bits value (inc) to the atomic counter (v) and + * returns the value of v after addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int16_t +rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} + +/** + * Atomically subtract a 16-bit value from a counter and return + * the result. + * + * Atomically subtracts the 16-bit value (inc) from the atomic counter + * (v) and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int16_t +rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} + +/** + * Atomically increment a 16-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the increment operation is 0; false otherwise. + */ +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + return __sync_add_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically decrement a 16-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the decrement operation is 0; false otherwise. + */ +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + return __sync_sub_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically test and set a 16-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 16-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic16_clear(rte_atomic16_t *v) +{ + v->cnt = 0; +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +/** + * Atomic compare and set. + * + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 32-bit words) + * + * @param dst + * The destination location into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int32_t cnt; /**< An internal counter value. */ +} rte_atomic32_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC32_INIT(val) { (val) } + +/** + * Initialize an atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_init(rte_atomic32_t *v) +{ + v->cnt = 0; +} + +/** + * Atomically read a 32-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int32_t +rte_atomic32_read(const rte_atomic32_t *v) +{ + return v->cnt; +} + +/** + * Atomically set a counter to a 32-bit value. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value for the counter. + */ +static inline void +rte_atomic32_set(rte_atomic32_t *v, int32_t new_value) +{ + v->cnt = new_value; +} + +/** + * Atomically add a 32-bit value to an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic32_add(rte_atomic32_t *v, int32_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} + +/** + * Atomically subtract a 32-bit value from an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic32_sub(rte_atomic32_t *v, int32_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} + +/** + * Atomically increment a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_inc(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + rte_atomic32_add(v, 1); +} +#endif + +/** + * Atomically decrement a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_dec(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + rte_atomic32_sub(v,1); +} +#endif + +/** + * Atomically add a 32-bit value to a counter and return the result. + * + * Atomically adds the 32-bits value (inc) to the atomic counter (v) and + * returns the value of v after addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int32_t +rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} + +/** + * Atomically subtract a 32-bit value from a counter and return + * the result. + * + * Atomically subtracts the 32-bit value (inc) from the atomic counter + * (v) and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int32_t +rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} + +/** + * Atomically increment a 32-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the increment operation is 0; false otherwise. + */ +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + return __sync_add_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically decrement a 32-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the decrement operation is 0; false otherwise. + */ +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + return __sync_sub_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically test and set a 32-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 32-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic32_clear(rte_atomic32_t *v) +{ + v->cnt = 0; +} + +/*------------------------- 64 bit atomic operations -------------------------*/ + +/** + * An atomic compare and set function used by the mutex functions. + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 64-bit words) + * + * @param dst + * The destination into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int64_t cnt; /**< Internal counter value. */ +} rte_atomic64_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC64_INIT(val) { (val) } + +/** + * Initialize the atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_init(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ +#ifdef __LP64__ + v->cnt = 0; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, 0); + } +#endif +} +#endif + +/** + * Atomically read a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ +#ifdef __LP64__ + return v->cnt; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + /* replace the value by itself */ + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp); + } + return tmp; +#endif +} +#endif + +/** + * Atomically set a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value of the counter. + */ +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ +#ifdef __LP64__ + v->cnt = new_value; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, new_value); + } +#endif +} +#endif + +/** + * Atomically add a 64-bit value to a counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} +#endif + +/** + * Atomically subtract a 64-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} +#endif + +/** + * Atomically increment a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_inc(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + rte_atomic64_add(v, 1); +} +#endif + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_dec(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + rte_atomic64_sub(v, 1); +} +#endif + +/** + * Add a 64-bit value to an atomic counter and return the result. + * + * Atomically adds the 64-bit value (inc) to the atomic counter (v) and + * returns the value of v after the addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} +#endif + +/** + * Subtract a 64-bit value from an atomic counter and return the result. + * + * Atomically subtracts the 64-bit value (dec) from the atomic counter (v) + * and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} +#endif + +/** + * Atomically increment a 64-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns + * true if the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the addition is 0; false otherwise. + */ +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_add_return(v, 1) == 0; +} +#endif + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after subtraction is 0; false otherwise. + */ +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_sub_return(v, 1) == 0; +} +#endif + +/** + * Atomically test and set a 64-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 64-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic64_clear(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + rte_atomic64_set(v, 0); +} +#endif + +#endif /* _RTE_ATOMIC_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h new file mode 100644 index 00000000..c46fdcf2 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -0,0 +1,217 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_H_ +#define _RTE_BYTEORDER_H_ + +/** + * @file + * + * Byte Swap Operations + * + * This file defines a generic API for byte swap operations. Part of + * the implementation is architecture-specific. + */ + +#include +#ifdef RTE_EXEC_ENV_BSDAPP +#include +#else +#include +#endif + +/* + * Compile-time endianness detection + */ +#define RTE_BIG_ENDIAN 1 +#define RTE_LITTLE_ENDIAN 2 +#if defined __BYTE_ORDER__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif /* __BYTE_ORDER__ */ +#elif defined __BYTE_ORDER +#if __BYTE_ORDER == __BIG_ENDIAN +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif __BYTE_ORDER == __LITTLE_ENDIAN +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif /* __BYTE_ORDER */ +#elif defined __BIG_ENDIAN__ +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif defined __LITTLE_ENDIAN__ +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif + +/* + * An internal function to swap bytes in a 16-bit value. + * + * It is used by rte_bswap16() when the value is constant. Do not use + * this function directly; rte_bswap16() is preferred. + */ +static inline uint16_t +rte_constant_bswap16(uint16_t x) +{ + return (uint16_t)(((x & 0x00ffU) << 8) | + ((x & 0xff00U) >> 8)); +} + +/* + * An internal function to swap bytes in a 32-bit value. + * + * It is used by rte_bswap32() when the value is constant. Do not use + * this function directly; rte_bswap32() is preferred. + */ +static inline uint32_t +rte_constant_bswap32(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} + +/* + * An internal function to swap bytes of a 64-bit value. + * + * It is used by rte_bswap64() when the value is constant. Do not use + * this function directly; rte_bswap64() is preferred. + */ +static inline uint64_t +rte_constant_bswap64(uint64_t x) +{ + return ((x & 0x00000000000000ffULL) << 56) | + ((x & 0x000000000000ff00ULL) << 40) | + ((x & 0x0000000000ff0000ULL) << 24) | + ((x & 0x00000000ff000000ULL) << 8) | + ((x & 0x000000ff00000000ULL) >> 8) | + ((x & 0x0000ff0000000000ULL) >> 24) | + ((x & 0x00ff000000000000ULL) >> 40) | + ((x & 0xff00000000000000ULL) >> 56); +} + + +#ifdef __DOXYGEN__ + +/** + * Swap bytes in a 16-bit value. + */ +static uint16_t rte_bswap16(uint16_t _x); + +/** + * Swap bytes in a 32-bit value. + */ +static uint32_t rte_bswap32(uint32_t x); + +/** + * Swap bytes in a 64-bit value. + */ +static uint64_t rte_bswap64(uint64_t x); + +/** + * Convert a 16-bit value from CPU order to little endian. + */ +static uint16_t rte_cpu_to_le_16(uint16_t x); + +/** + * Convert a 32-bit value from CPU order to little endian. + */ +static uint32_t rte_cpu_to_le_32(uint32_t x); + +/** + * Convert a 64-bit value from CPU order to little endian. + */ +static uint64_t rte_cpu_to_le_64(uint64_t x); + + +/** + * Convert a 16-bit value from CPU order to big endian. + */ +static uint16_t rte_cpu_to_be_16(uint16_t x); + +/** + * Convert a 32-bit value from CPU order to big endian. + */ +static uint32_t rte_cpu_to_be_32(uint32_t x); + +/** + * Convert a 64-bit value from CPU order to big endian. + */ +static uint64_t rte_cpu_to_be_64(uint64_t x); + + +/** + * Convert a 16-bit value from little endian to CPU order. + */ +static uint16_t rte_le_to_cpu_16(uint16_t x); + +/** + * Convert a 32-bit value from little endian to CPU order. + */ +static uint32_t rte_le_to_cpu_32(uint32_t x); + +/** + * Convert a 64-bit value from little endian to CPU order. + */ +static uint64_t rte_le_to_cpu_64(uint64_t x); + + +/** + * Convert a 16-bit value from big endian to CPU order. + */ +static uint16_t rte_be_to_cpu_16(uint16_t x); + +/** + * Convert a 32-bit value from big endian to CPU order. + */ +static uint32_t rte_be_to_cpu_32(uint32_t x); + +/** + * Convert a 64-bit value from big endian to CPU order. + */ +static uint64_t rte_be_to_cpu_64(uint64_t x); + +#endif /* __DOXYGEN__ */ + +#ifdef RTE_FORCE_INTRINSICS +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +#define rte_bswap16(x) __builtin_bswap16(x) +#endif + +#define rte_bswap32(x) __builtin_bswap32(x) + +#define rte_bswap64(x) __builtin_bswap64(x) + +#endif + +#endif /* _RTE_BYTEORDER_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h new file mode 100644 index 00000000..c1da357c --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_H_ +#define _RTE_CPUFLAGS_H_ + +/** + * @file + * Architecture specific API to determine available CPU features at runtime. + */ + +#include + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t; + +/** + * Get name of CPU flag + * + * @param feature + * CPU flag ID + * @return + * flag name + * NULL if flag ID is invalid + */ +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); + +/** + * Function for checking a CPU flag availability + * + * @param feature + * CPU flag to query CPU for + * @return + * 1 if flag is available + * 0 if flag is not available + * -ENOENT if flag is invalid + */ +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); + +/** + * This function checks that the currently used CPU supports the CPU features + * that were specified at compile time. It is called automatically within the + * EAL, so does not need to be used by applications. + */ +void +rte_cpu_check_supported(void); + +#endif /* _RTE_CPUFLAGS_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h new file mode 100644 index 00000000..8cc21f20 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_cycles.h @@ -0,0 +1,205 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_H_ +#define _RTE_CYCLES_H_ + +/** + * @file + * + * Simple Time Reference Functions (Cycles and HPET). + */ + +#include +#include +#include + +#define MS_PER_S 1000 +#define US_PER_S 1000000 +#define NS_PER_S 1000000000 + +enum timer_source { + EAL_TIMER_TSC = 0, + EAL_TIMER_HPET +}; +extern enum timer_source eal_timer_source; + +/** + * Get the measured frequency of the RDTSC counter + * + * @return + * The TSC frequency for this lcore + */ +uint64_t +rte_get_tsc_hz(void); + +/** + * Return the number of TSC cycles since boot + * + * @return + * the number of cycles + */ +static inline uint64_t +rte_get_tsc_cycles(void); + +#ifdef RTE_LIBEAL_USE_HPET +/** + * Return the number of HPET cycles since boot + * + * This counter is global for all execution units. The number of + * cycles in one second can be retrieved using rte_get_hpet_hz(). + * + * @return + * the number of cycles + */ +uint64_t +rte_get_hpet_cycles(void); + +/** + * Get the number of HPET cycles in one second. + * + * @return + * The number of cycles in one second. + */ +uint64_t +rte_get_hpet_hz(void); + +/** + * Initialise the HPET for use. This must be called before the rte_get_hpet_hz + * and rte_get_hpet_cycles APIs are called. If this function does not succeed, + * then the HPET functions are unavailable and should not be called. + * + * @param make_default + * If set, the hpet timer becomes the default timer whose values are + * returned by the rte_get_timer_hz/cycles API calls + * + * @return + * 0 on success, + * -1 on error, and the make_default parameter is ignored. + */ +int rte_eal_hpet_init(int make_default); + +#endif + +/** + * Get the number of cycles since boot from the default timer. + * + * @return + * The number of cycles + */ +static inline uint64_t +rte_get_timer_cycles(void) +{ + switch(eal_timer_source) { + case EAL_TIMER_TSC: + return rte_get_tsc_cycles(); + case EAL_TIMER_HPET: +#ifdef RTE_LIBEAL_USE_HPET + return rte_get_hpet_cycles(); +#endif + default: rte_panic("Invalid timer source specified\n"); + } +} + +/** + * Get the number of cycles in one second for the default timer. + * + * @return + * The number of cycles in one second. + */ +static inline uint64_t +rte_get_timer_hz(void) +{ + switch(eal_timer_source) { + case EAL_TIMER_TSC: + return rte_get_tsc_hz(); + case EAL_TIMER_HPET: +#ifdef RTE_LIBEAL_USE_HPET + return rte_get_hpet_hz(); +#endif + default: rte_panic("Invalid timer source specified\n"); + } +} + +/** + * Wait at least us microseconds. + * + * @param us + * The number of microseconds to wait. + */ +void +rte_delay_us(unsigned us); + +/** + * Wait at least ms milliseconds. + * + * @param ms + * The number of milliseconds to wait. + */ +static inline void +rte_delay_ms(unsigned ms) +{ + rte_delay_us(ms * 1000); +} + +#endif /* _RTE_CYCLES_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h new file mode 100644 index 00000000..03e84773 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_memcpy.h @@ -0,0 +1,144 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_H_ +#define _RTE_MEMCPY_H_ + +/** + * @file + * + * Functions for vectorised implementation of memcpy(). + */ + +/** + * Copy 16 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src); + +/** + * Copy 32 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src); + +/** + * Copy 48 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src); + +/** + * Copy 64 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src); + +/** + * Copy 128 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src); + +/** + * Copy 256 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src); + +#ifdef __DOXYGEN__ + +/** + * Copy bytes from one location to another. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + * @param n + * Number of bytes to copy. + * @return + * Pointer to the destination data. + */ +static void * +rte_memcpy(void *dst, const void *src, size_t n); + +#endif /* __DOXYGEN__ */ + +/* + * memcpy() function used by rte_memcpy macro + */ +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); + + +#endif /* _RTE_MEMCPY_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h new file mode 100644 index 00000000..07e409ec --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_prefetch.h @@ -0,0 +1,83 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_H_ +#define _RTE_PREFETCH_H_ + +/** + * @file + * + * Prefetch operations. + * + * This file defines an API for prefetch macros / inline-functions, + * which are architecture-dependent. Prefetching occurs when a + * processor requests an instruction or data from memory to cache + * before it is actually needed, potentially speeding up the execution of the + * program. + */ + +/** + * Prefetch a cache line into all cache levels. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch0(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels except the 0th cache level. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch1(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels except the 0th and 1th cache + * levels. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch2(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels (non-temporal/transient version) + * + * The non-temporal prefetch is intended as a prefetch hint that processor will + * use the prefetched data only once or short period, unlike the + * rte_prefetch0() function which imply that prefetched data to use repeatedly. + * + * @param p + * Address to prefetch + */ +static inline void rte_prefetch_non_temporal(const volatile void *p); + +#endif /* _RTE_PREFETCH_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h new file mode 100644 index 00000000..7a0fdc55 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_rwlock.h @@ -0,0 +1,208 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RWLOCK_H_ +#define _RTE_RWLOCK_H_ + +/** + * @file + * + * RTE Read-Write Locks + * + * This file defines an API for read-write locks. The lock is used to + * protect data that allows multiple readers in parallel, but only + * one writer. All readers are blocked until the writer is finished + * writing. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * The rte_rwlock_t type. + * + * cnt is -1 when write lock is held, and > 0 when read locks are held. + */ +typedef struct { + volatile int32_t cnt; /**< -1 when W lock held, > 0 when R locks held. */ +} rte_rwlock_t; + +/** + * A static rwlock initializer. + */ +#define RTE_RWLOCK_INITIALIZER { 0 } + +/** + * Initialize the rwlock to an unlocked state. + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_init(rte_rwlock_t *rwl) +{ + rwl->cnt = 0; +} + +/** + * Take a read lock. Loop until the lock is held. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_read_lock(rte_rwlock_t *rwl) +{ + int32_t x; + int success = 0; + + while (success == 0) { + x = rwl->cnt; + /* write lock is held */ + if (x < 0) { + rte_pause(); + continue; + } + success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, + x, x + 1); + } +} + +/** + * Release a read lock. + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_read_unlock(rte_rwlock_t *rwl) +{ + rte_atomic32_dec((rte_atomic32_t *)(intptr_t)&rwl->cnt); +} + +/** + * Take a write lock. Loop until the lock is held. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_lock(rte_rwlock_t *rwl) +{ + int32_t x; + int success = 0; + + while (success == 0) { + x = rwl->cnt; + /* a lock is held */ + if (x != 0) { + rte_pause(); + continue; + } + success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, + 0, -1); + } +} + +/** + * Release a write lock. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_unlock(rte_rwlock_t *rwl) +{ + rte_atomic32_inc((rte_atomic32_t *)(intptr_t)&rwl->cnt); +} + +/** + * Try to execute critical section in a hardware memory transaction, if it + * fails or not available take a read lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl); + +/** + * Commit hardware memory transaction or release the read lock if the lock is used as a fall-back + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl); + +/** + * Try to execute critical section in a hardware memory transaction, if it + * fails or not available take a write lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl); + +/** + * Commit hardware memory transaction or release the write lock if the lock is used as a fall-back + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h new file mode 100644 index 00000000..e51fc56b --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_spinlock.h @@ -0,0 +1,325 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_H_ +#define _RTE_SPINLOCK_H_ + +/** + * @file + * + * RTE Spinlocks + * + * This file defines an API for read-write locks, which are implemented + * in an architecture-specific way. This kind of lock simply waits in + * a loop repeatedly checking until the lock becomes available. + * + * All locks must be initialised before use, and only initialised once. + * + */ + +#include +#ifdef RTE_FORCE_INTRINSICS +#include +#endif + +/** + * The rte_spinlock_t type. + */ +typedef struct { + volatile int locked; /**< lock status 0 = unlocked, 1 = locked */ +} rte_spinlock_t; + +/** + * A static spinlock initializer. + */ +#define RTE_SPINLOCK_INITIALIZER { 0 } + +/** + * Initialize the spinlock to an unlocked state. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_init(rte_spinlock_t *sl) +{ + sl->locked = 0; +} + +/** + * Take the spinlock. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_lock(rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + while (__sync_lock_test_and_set(&sl->locked, 1)) + while(sl->locked) + rte_pause(); +} +#endif + +/** + * Release the spinlock. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl) +{ + __sync_lock_release(&sl->locked); +} +#endif + +/** + * Try to take the lock. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl) +{ + return __sync_lock_test_and_set(&sl->locked,1) == 0; +} +#endif + +/** + * Test if the lock is taken. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the lock is currently taken; 0 otherwise. + */ +static inline int rte_spinlock_is_locked (rte_spinlock_t *sl) +{ + return sl->locked; +} + +/** + * Test if hardware transactional memory (lock elision) is supported + * + * @return + * 1 if the hardware transactional memory is supported; 0 otherwise. + */ +static inline int rte_tm_supported(void); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the spinlock. + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl); + +/** + * Commit hardware memory transaction or release the spinlock if + * the spinlock is used as a fall-back + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the lock. + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl); + +/** + * The rte_spinlock_recursive_t type. + */ +typedef struct { + rte_spinlock_t sl; /**< the actual spinlock */ + volatile int user; /**< core id using lock, -1 for unused */ + volatile int count; /**< count of time this lock has been called */ +} rte_spinlock_recursive_t; + +/** + * A static recursive spinlock initializer. + */ +#define RTE_SPINLOCK_RECURSIVE_INITIALIZER {RTE_SPINLOCK_INITIALIZER, -1, 0} + +/** + * Initialize the recursive spinlock to an unlocked state. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_init(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_init(&slr->sl); + slr->user = -1; + slr->count = 0; +} + +/** + * Take the recursive spinlock. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_lock(rte_spinlock_recursive_t *slr) +{ + int id = rte_gettid(); + + if (slr->user != id) { + rte_spinlock_lock(&slr->sl); + slr->user = id; + } + slr->count++; +} +/** + * Release the recursive spinlock. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_unlock(rte_spinlock_recursive_t *slr) +{ + if (--(slr->count) == 0) { + slr->user = -1; + rte_spinlock_unlock(&slr->sl); + } + +} + +/** + * Try to take the recursive lock. + * + * @param slr + * A pointer to the recursive spinlock. + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ +static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr) +{ + int id = rte_gettid(); + + if (slr->user != id) { + if (rte_spinlock_trylock(&slr->sl) == 0) + return 0; + slr->user = id; + } + slr->count++; + return 1; +} + + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the recursive spinlocks + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_lock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Commit hardware memory transaction or release the recursive spinlock + * if the recursive spinlock is used as a fall-back + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_unlock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the recursive lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param slr + * A pointer to the recursive spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int rte_spinlock_recursive_trylock_tm( + rte_spinlock_recursive_t *slr); + +#endif /* _RTE_SPINLOCK_H_ */ diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h new file mode 100644 index 00000000..4012cd67 --- /dev/null +++ b/lib/librte_eal/common/include/rte_alarm.h @@ -0,0 +1,106 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ALARM_H_ +#define _RTE_ALARM_H_ + +/** + * @file + * + * Alarm functions + * + * Simple alarm-clock functionality supplied by eal. + * Does not require hpet support. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Signature of callback back function called when an alarm goes off. + */ +typedef void (*rte_eal_alarm_callback)(void *arg); + +/** + * Function to set a callback to be triggered when us microseconds + * have expired. Accuracy of timing to the microsecond is not guaranteed. The + * alarm function will not be called *before* the requested time, but may + * be called a short period of time afterwards. + * The alarm handler will be called only once. There is no need to call + * "rte_eal_alarm_cancel" from within the callback function. + * + * @param us + * The time in microseconds before the callback is called + * @param cb + * The function to be called when the alarm expires + * @param cb_arg + * Pointer parameter to be passed to the callback function + * + * @return + * On success, zero. + * On failure, a negative error number + */ +int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg); + +/** + * Function to cancel an alarm callback which has been registered before. If + * used outside alarm callback it wait for all callbacks to finish execution. + * + * @param cb_fn + * alarm callback + * @param cb_arg + * Pointer parameter to be passed to the callback function. To remove all + * copies of a given callback function, irrespective of parameter, (void *)-1 + * can be used here. + * + * @return + * - value greater than 0 and rte_errno not changed - returned value is + * the number of canceled alarm callback functions + * - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one + * alarm could not be canceled because cancellation was requested from alarm + * callback context. Returned value is the number of succesfuly canceled + * alarm callbacks + * - 0 and rte_errno set to ENOENT - no alarm found + * - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback) + */ +int rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg); + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_ALARM_H_ */ diff --git a/lib/librte_eal/common/include/rte_branch_prediction.h b/lib/librte_eal/common/include/rte_branch_prediction.h new file mode 100644 index 00000000..a6a56d17 --- /dev/null +++ b/lib/librte_eal/common/include/rte_branch_prediction.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Branch Prediction Helpers in RTE + */ + +#ifndef _RTE_BRANCH_PREDICTION_H_ +#define _RTE_BRANCH_PREDICTION_H_ + +/** + * Check if a branch is likely to be taken. + * + * This compiler builtin allows the developer to indicate if a branch is + * likely to be taken. Example: + * + * if (likely(x > 1)) + * do_stuff(); + * + */ +#ifndef likely +#define likely(x) __builtin_expect((x),1) +#endif /* likely */ + +/** + * Check if a branch is unlikely to be taken. + * + * This compiler builtin allows the developer to indicate if a branch is + * unlikely to be taken. Example: + * + * if (unlikely(x < 1)) + * do_stuff(); + * + */ +#ifndef unlikely +#define unlikely(x) __builtin_expect((x),0) +#endif /* unlikely */ + +#endif /* _RTE_BRANCH_PREDICTION_H_ */ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h new file mode 100644 index 00000000..332f2a43 --- /dev/null +++ b/lib/librte_eal/common/include/rte_common.h @@ -0,0 +1,401 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMMON_H_ +#define _RTE_COMMON_H_ + +/** + * @file + * + * Generic, commonly-used macro and inline function definitions + * for DPDK. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +#ifndef typeof +#define typeof __typeof__ +#endif + +#ifndef asm +#define asm __asm__ +#endif + +#ifdef RTE_ARCH_STRICT_ALIGN +typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1))); +typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1))); +typedef uint16_t unaligned_uint16_t __attribute__ ((aligned(1))); +#else +typedef uint64_t unaligned_uint64_t; +typedef uint32_t unaligned_uint32_t; +typedef uint16_t unaligned_uint16_t; +#endif + +/** + * Force alignment + */ +#define __rte_aligned(a) __attribute__((__aligned__(a))) + +/** + * Force a structure to be packed + */ +#define __rte_packed __attribute__((__packed__)) + +/******* Macro to mark functions and fields scheduled for removal *****/ +#define __rte_deprecated __attribute__((__deprecated__)) + +/*********** Macros to eliminate unused variable warnings ********/ + +/** + * short definition to mark a function parameter unused + */ +#define __rte_unused __attribute__((__unused__)) + +/** + * definition to mark a variable or function parameter as used so + * as to avoid a compiler warning + */ +#define RTE_SET_USED(x) (void)(x) + +/*********** Macros for pointer arithmetic ********/ + +/** + * add a byte-value offset from a pointer + */ +#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x))) + +/** + * subtract a byte-value offset from a pointer + */ +#define RTE_PTR_SUB(ptr, x) ((void*)((uintptr_t)ptr - (x))) + +/** + * get the difference between two pointer values, i.e. how far apart + * in bytes are the locations they point two. It is assumed that + * ptr1 is greater than ptr2. + */ +#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2)) + +/*********** Macros/static functions for doing alignment ********/ + + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no higher than the first parameter. Second parameter + * must be a power-of-two value. + */ +#define RTE_PTR_ALIGN_FLOOR(ptr, align) \ + ((typeof(ptr))RTE_ALIGN_FLOOR((uintptr_t)ptr, align)) + +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no + * bigger than the first parameter. Second parameter must be a + * power-of-two value. + */ +#define RTE_ALIGN_FLOOR(val, align) \ + (typeof(val))((val) & (~((typeof(val))((align) - 1)))) + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no lower than the first parameter. Second parameter + * must be a power-of-two value. + */ +#define RTE_PTR_ALIGN_CEIL(ptr, align) \ + RTE_PTR_ALIGN_FLOOR((typeof(ptr))RTE_PTR_ADD(ptr, (align) - 1), align) + +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no lower + * than the first parameter. Second parameter must be a power-of-two + * value. + */ +#define RTE_ALIGN_CEIL(val, align) \ + RTE_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align) + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no lower than the first parameter. Second parameter + * must be a power-of-two value. + * This function is the same as RTE_PTR_ALIGN_CEIL + */ +#define RTE_PTR_ALIGN(ptr, align) RTE_PTR_ALIGN_CEIL(ptr, align) + +/** + * Macro to align a value to a given power-of-two. The resultant + * value will be of the same type as the first parameter, and + * will be no lower than the first parameter. Second parameter + * must be a power-of-two value. + * This function is the same as RTE_ALIGN_CEIL + */ +#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align) + +/** + * Checks if a pointer is aligned to a given power-of-two value + * + * @param ptr + * The pointer whose alignment is to be checked + * @param align + * The power-of-two value to which the ptr should be aligned + * + * @return + * True(1) where the pointer is correctly aligned, false(0) otherwise + */ +static inline int +rte_is_aligned(void *ptr, unsigned align) +{ + return RTE_PTR_ALIGN(ptr, align) == ptr; +} + +/*********** Macros for compile type checks ********/ + +/** + * Triggers an error at compilation time if the condition is true. + */ +#ifndef __OPTIMIZE__ +#define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int RTE_BUILD_BUG_ON_detected_error; +#define RTE_BUILD_BUG_ON(condition) do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) \ + RTE_BUILD_BUG_ON_detected_error = 1; \ +} while(0) +#endif + +/*********** Macros to work with powers of 2 ********/ + +/** + * Returns true if n is a power of 2 + * @param n + * Number to check + * @return 1 if true, 0 otherwise + */ +static inline int +rte_is_power_of_2(uint32_t n) +{ + return n && !(n & (n - 1)); +} + +/** + * Aligns input parameter to the next power of 2 + * + * @param x + * The integer value to algin + * + * @return + * Input parameter aligned to the next power of 2 + */ +static inline uint32_t +rte_align32pow2(uint32_t x) +{ + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return x + 1; +} + +/** + * Aligns 64b input parameter to the next power of 2 + * + * @param v + * The 64b value to align + * + * @return + * Input parameter aligned to the next power of 2 + */ +static inline uint64_t +rte_align64pow2(uint64_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return v + 1; +} + +/*********** Macros for calculating min and max **********/ + +/** + * Macro to return the minimum of two numbers + */ +#define RTE_MIN(a, b) ({ \ + typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a < _b ? _a : _b; \ + }) + +/** + * Macro to return the maximum of two numbers + */ +#define RTE_MAX(a, b) ({ \ + typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a > _b ? _a : _b; \ + }) + +/*********** Other general functions / macros ********/ + +#ifdef __SSE2__ +#include +/** + * PAUSE instruction for tight loops (avoid busy waiting) + */ +static inline void +rte_pause (void) +{ + _mm_pause(); +} +#else +static inline void +rte_pause(void) {} +#endif + +/** + * Searches the input parameter for the least significant set bit + * (starting from zero). + * If a least significant 1 bit is found, its bit index is returned. + * If the content of the input parameter is zero, then the content of the return + * value is undefined. + * @param v + * input parameter, should not be zero. + * @return + * least significant set bit in the input parameter. + */ +static inline uint32_t +rte_bsf32(uint32_t v) +{ + return __builtin_ctz(v); +} + +#ifndef offsetof +/** Return the offset of a field in a structure. */ +#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) +#endif + +#define _RTE_STR(x) #x +/** Take a macro value and get a string version of it */ +#define RTE_STR(x) _RTE_STR(x) + +/** Mask value of type "tp" for the first "ln" bit set. */ +#define RTE_LEN2MASK(ln, tp) \ + ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln)))) + +/** Number of elements in the array. */ +#define RTE_DIM(a) (sizeof (a) / sizeof ((a)[0])) + +/** + * Converts a numeric string to the equivalent uint64_t value. + * As well as straight number conversion, also recognises the suffixes + * k, m and g for kilobytes, megabytes and gigabytes respectively. + * + * If a negative number is passed in i.e. a string with the first non-black + * character being "-", zero is returned. Zero is also returned in the case of + * an error with the strtoull call in the function. + * + * @param str + * String containing number to convert. + * @return + * Number. + */ +static inline uint64_t +rte_str_to_size(const char *str) +{ + char *endptr; + unsigned long long size; + + while (isspace((int)*str)) + str++; + if (*str == '-') + return 0; + + errno = 0; + size = strtoull(str, &endptr, 0); + if (errno) + return 0; + + if (*endptr == ' ') + endptr++; /* allow 1 space gap */ + + switch (*endptr){ + case 'G': case 'g': size *= 1024; /* fall-through */ + case 'M': case 'm': size *= 1024; /* fall-through */ + case 'K': case 'k': size *= 1024; /* fall-through */ + default: + break; + } + return size; +} + +/** + * Function to terminate the application immediately, printing an error + * message and returning the exit_code back to the shell. + * + * This function never returns + * + * @param exit_code + * The exit code to be returned by the application + * @param format + * The format string to be used for printing the message. This can include + * printf format characters which will be expanded using any further parameters + * to the function. + */ +void +rte_exit(int exit_code, const char *format, ...) + __attribute__((noreturn)) + __attribute__((format(printf, 2, 3))); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h new file mode 100644 index 00000000..94129fab --- /dev/null +++ b/lib/librte_eal/common/include/rte_debug.h @@ -0,0 +1,103 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEBUG_H_ +#define _RTE_DEBUG_H_ + +/** + * @file + * + * Debug Functions in RTE + * + * This file defines a generic API for debug operations. Part of + * the implementation is architecture-specific. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Dump the stack of the calling core to the console. + */ +void rte_dump_stack(void); + +/** + * Dump the registers of the calling core to the console. + * + * Note: Not implemented in a userapp environment; use gdb instead. + */ +void rte_dump_registers(void); + +/** + * Provide notification of a critical non-recoverable error and terminate + * execution abnormally. + * + * Display the format string and its expanded arguments (printf-like). + * + * In a linuxapp environment, this function dumps the stack and calls + * abort() resulting in a core dump if enabled. + * + * The function never returns. + * + * @param ... + * The format string, followed by the variable list of arguments. + */ +#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") +#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) + +#define RTE_VERIFY(exp) do { \ + if (!(exp)) \ + rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ +} while (0) + +/* + * Provide notification of a critical non-recoverable error and stop. + * + * This function should not be called directly. Refer to rte_panic() macro + * documentation. + */ +void __rte_panic(const char *funcname , const char *format, ...) +#ifdef __GNUC__ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) + __attribute__((cold)) +#endif +#endif + __attribute__((noreturn)) + __attribute__((format(printf, 2, 3))); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_DEBUG_H_ */ diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h new file mode 100644 index 00000000..f1b55079 --- /dev/null +++ b/lib/librte_eal/common/include/rte_dev.h @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEV_H_ +#define _RTE_DEV_H_ + +/** + * @file + * + * RTE PMD Driver Registration Interface + * + * This file manages the list of device drivers. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include + +__attribute__((format(printf, 2, 0))) +static inline void +rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; + + va_end(ap); + + va_start(ap, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, ap); + va_end(ap); + + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); +} + +/* Macros for checking for restricting functions to primary instance only */ +#define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ + RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ + return retval; \ + } \ +} while (0) + +#define RTE_PROC_PRIMARY_OR_RET() do { \ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ + RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ + return; \ + } \ +} while (0) + +/* Macros to check for invalid function pointers */ +#define RTE_FUNC_PTR_OR_ERR_RET(func, retval) do { \ + if ((func) == NULL) { \ + RTE_PMD_DEBUG_TRACE("Function not supported\n"); \ + return retval; \ + } \ +} while (0) + +#define RTE_FUNC_PTR_OR_RET(func) do { \ + if ((func) == NULL) { \ + RTE_PMD_DEBUG_TRACE("Function not supported\n"); \ + return; \ + } \ +} while (0) + + +/** Double linked list of device drivers. */ +TAILQ_HEAD(rte_driver_list, rte_driver); + +/** + * Initialization function called for each device driver once. + */ +typedef int (rte_dev_init_t)(const char *name, const char *args); + +/** + * Uninitilization function called for each device driver once. + */ +typedef int (rte_dev_uninit_t)(const char *name); + +/** + * Driver type enumeration + */ +enum pmd_type { + PMD_VDEV = 0, + PMD_PDEV = 1, +}; + +/** + * A structure describing a device driver. + */ +struct rte_driver { + TAILQ_ENTRY(rte_driver) next; /**< Next in list. */ + enum pmd_type type; /**< PMD Driver type */ + const char *name; /**< Driver name. */ + rte_dev_init_t *init; /**< Device init. function. */ + rte_dev_uninit_t *uninit; /**< Device uninit. function. */ +}; + +/** + * Register a device driver. + * + * @param driver + * A pointer to a rte_dev structure describing the driver + * to be registered. + */ +void rte_eal_driver_register(struct rte_driver *driver); + +/** + * Unregister a device driver. + * + * @param driver + * A pointer to a rte_dev structure describing the driver + * to be unregistered. + */ +void rte_eal_driver_unregister(struct rte_driver *driver); + +/** + * Initalize all the registered drivers in this process + */ +int rte_eal_dev_init(void); + +/** + * Initialize a driver specified by name. + * + * @param name + * The pointer to a driver name to be initialized. + * @param args + * The pointer to arguments used by driver initialization. + * @return + * 0 on success, negative on error + */ +int rte_eal_vdev_init(const char *name, const char *args); + +/** + * Uninitalize a driver specified by name. + * + * @param name + * The pointer to a driver name to be initialized. + * @return + * 0 on success, negative on error + */ +int rte_eal_vdev_uninit(const char *name); + +#define PMD_REGISTER_DRIVER(d)\ +void devinitfn_ ##d(void);\ +void __attribute__((constructor, used)) devinitfn_ ##d(void)\ +{\ + rte_eal_driver_register(&d);\ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VDEV_H_ */ diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h new file mode 100644 index 00000000..53c59f56 --- /dev/null +++ b/lib/librte_eal/common/include/rte_devargs.h @@ -0,0 +1,177 @@ +/*- + * BSD LICENSE + * + * Copyright 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEVARGS_H_ +#define _RTE_DEVARGS_H_ + +/** + * @file + * + * RTE devargs: list of devices and their user arguments + * + * This file stores a list of devices and their arguments given by + * the user when a DPDK application is started. These devices can be PCI + * devices or virtual devices. These devices are stored at startup in a + * list of rte_devargs structures. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** + * Type of generic device + */ +enum rte_devtype { + RTE_DEVTYPE_WHITELISTED_PCI, + RTE_DEVTYPE_BLACKLISTED_PCI, + RTE_DEVTYPE_VIRTUAL, +}; + +/** + * Structure that stores a device given by the user with its arguments + * + * A user device is a physical or a virtual device given by the user to + * the DPDK application at startup through command line arguments. + * + * The structure stores the configuration of the device, its PCI + * identifier if it's a PCI device or the driver name if it's a virtual + * device. + */ +struct rte_devargs { + /** Next in list. */ + TAILQ_ENTRY(rte_devargs) next; + /** Type of device. */ + enum rte_devtype type; + union { + /** Used if type is RTE_DEVTYPE_*_PCI. */ + struct { + /** PCI location. */ + struct rte_pci_addr addr; + } pci; + /** Used if type is RTE_DEVTYPE_VIRTUAL. */ + struct { + /** Driver name. */ + char drv_name[32]; + } virt; + }; + /** Arguments string as given by user or "" for no argument. */ + char *args; +}; + +/** user device double-linked queue type definition */ +TAILQ_HEAD(rte_devargs_list, rte_devargs); + +/** Global list of user devices */ +extern struct rte_devargs_list devargs_list; + +/** + * Parse a devargs string. + * + * For PCI devices, the format of arguments string is "PCI_ADDR" or + * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0", + * "04:00.0,arg=val". + * + * For virtual devices, the format of arguments string is "DRIVER_NAME*" + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", + * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". + * + * The function parses the arguments string to get driver name and driver + * arguments. + * + * @param devargs_str + * The arguments as given by the user. + * @param drvname + * The pointer to the string to store parsed driver name. + * @param drvargs + * The pointer to the string to store parsed driver arguments. + * + * @return + * - 0 on success + * - A negative value on error + */ +int rte_eal_parse_devargs_str(const char *devargs_str, + char **drvname, char **drvargs); + +/** + * Add a device to the user device list + * + * For PCI devices, the format of arguments string is "PCI_ADDR" or + * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0", + * "04:00.0,arg=val". + * + * For virtual devices, the format of arguments string is "DRIVER_NAME*" + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", + * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the + * driver name is not checked by this function, it is done when probing + * the drivers. + * + * @param devtype + * The type of the device. + * @param devargs_str + * The arguments as given by the user. + * + * @return + * - 0 on success + * - A negative value on error + */ +int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str); + +/** + * Count the number of user devices of a specified type + * + * @param devtype + * The type of the devices to counted. + * + * @return + * The number of devices. + */ +unsigned int +rte_eal_devargs_type_count(enum rte_devtype devtype); + +/** + * This function dumps the list of user device and their arguments. + * + * @param f + * A pointer to a file for output + */ +void rte_eal_devargs_dump(FILE *f); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_DEVARGS_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h new file mode 100644 index 00000000..a71d6f57 --- /dev/null +++ b/lib/librte_eal/common/include/rte_eal.h @@ -0,0 +1,259 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EAL_H_ +#define _RTE_EAL_H_ + +/** + * @file + * + * EAL Configuration API + */ + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MAGIC 19820526 /**< Magic number written by the main partition when ready. */ + +/* Maximum thread_name length. */ +#define RTE_MAX_THREAD_NAME_LEN 16 + +/** + * The lcore role (used in RTE or not). + */ +enum rte_lcore_role_t { + ROLE_RTE, + ROLE_OFF, +}; + +/** + * The type of process in a linuxapp, multi-process setup + */ +enum rte_proc_type_t { + RTE_PROC_AUTO = -1, /* allow auto-detection of primary/secondary */ + RTE_PROC_PRIMARY = 0, /* set to zero, so primary is the default */ + RTE_PROC_SECONDARY, + + RTE_PROC_INVALID +}; + +/** + * The global RTE configuration structure. + */ +struct rte_config { + uint32_t master_lcore; /**< Id of the master lcore */ + uint32_t lcore_count; /**< Number of available logical cores. */ + enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */ + + /** Primary or secondary configuration */ + enum rte_proc_type_t process_type; + + /** + * Pointer to memory configuration, which may be shared across multiple + * DPDK instances + */ + struct rte_mem_config *mem_config; +} __attribute__((__packed__)); + +/** + * Get the global configuration structure. + * + * @return + * A pointer to the global configuration structure. + */ +struct rte_config *rte_eal_get_configuration(void); + +/** + * Get a lcore's role. + * + * @param lcore_id + * The identifier of the lcore. + * @return + * The role of the lcore. + */ +enum rte_lcore_role_t rte_eal_lcore_role(unsigned lcore_id); + + +/** + * Get the process type in a multi-process setup + * + * @return + * The process type + */ +enum rte_proc_type_t rte_eal_process_type(void); + +/** + * Request iopl privilege for all RPL. + * + * This function should be called by pmds which need access to ioports. + + * @return + * - On success, returns 0. + * - On failure, returns -1. + */ +int rte_eal_iopl_init(void); + +/** + * Initialize the Environment Abstraction Layer (EAL). + * + * This function is to be executed on the MASTER lcore only, as soon + * as possible in the application's main() function. + * + * The function finishes the initialization process before main() is called. + * It puts the SLAVE lcores in the WAIT state. + * + * When the multi-partition feature is supported, depending on the + * configuration (if CONFIG_RTE_EAL_MAIN_PARTITION is disabled), this + * function waits to ensure that the magic number is set before + * returning. See also the rte_eal_get_configuration() function. Note: + * This behavior may change in the future. + * + * @param argc + * The argc argument that was given to the main() function. + * @param argv + * The argv argument that was given to the main() function. + * @return + * - On success, the number of parsed arguments, which is greater or + * equal to zero. After the call to rte_eal_init(), + * all arguments argv[x] with x < ret may be modified and should + * not be accessed by the application. + * - On failure, a negative error value. + */ +int rte_eal_init(int argc, char **argv); + +/** + * Check if a primary process is currently alive + * + * This function returns true when a primary process is currently + * active. + * + * @param config_file_path + * The config_file_path argument provided should point at the location + * that the primary process will create its config file. If NULL, the default + * config file path is used. + * + * @return + * - If alive, returns 1. + * - If dead, returns 0. + */ +int rte_eal_primary_proc_alive(const char *config_file_path); + +/** + * Usage function typedef used by the application usage function. + * + * Use this function typedef to define and call rte_set_applcation_usage_hook() + * routine. + */ +typedef void (*rte_usage_hook_t)(const char * prgname); + +/** + * Add application usage routine callout from the eal_usage() routine. + * + * This function allows the application to include its usage message + * in the EAL system usage message. The routine rte_set_application_usage_hook() + * needs to be called before the rte_eal_init() routine in the application. + * + * This routine is optional for the application and will behave as if the set + * routine was never called as the default behavior. + * + * @param usage_func + * The func argument is a function pointer to the application usage routine. + * Called function is defined using rte_usage_hook_t typedef, which is of + * the form void rte_usage_func(const char * prgname). + * + * Calling this routine with a NULL value will reset the usage hook routine and + * return the current value, which could be NULL. + * @return + * - Returns the current value of the rte_application_usage pointer to allow + * the caller to daisy chain the usage routines if needing more then one. + */ +rte_usage_hook_t +rte_set_application_usage_hook(rte_usage_hook_t usage_func); + +/** + * macro to get the lock of tailq in mem_config + */ +#define RTE_EAL_TAILQ_RWLOCK (&rte_eal_get_configuration()->mem_config->qlock) + +/** + * macro to get the multiple lock of mempool shared by mutiple-instance + */ +#define RTE_EAL_MEMPOOL_RWLOCK (&rte_eal_get_configuration()->mem_config->mplock) + +/** + * Whether EAL is using huge pages (disabled by --no-huge option). + * The no-huge mode cannot be used with UIO poll-mode drivers like igb/ixgbe. + * It is useful for NIC drivers (e.g. librte_pmd_mlx4, librte_pmd_vmxnet3) or + * crypto drivers (e.g. librte_crypto_nitrox) provided by third-parties such + * as 6WIND. + * + * @return + * Nonzero if hugepages are enabled. + */ +int rte_eal_has_hugepages(void); + +/** + * A wrap API for syscall gettid. + * + * @return + * On success, returns the thread ID of calling process. + * It is always successful. + */ +int rte_sys_gettid(void); + +/** + * Get system unique thread id. + * + * @return + * On success, returns the thread ID of calling process. + * It is always successful. + */ +static inline int rte_gettid(void) +{ + static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1; + if (RTE_PER_LCORE(_thread_id) == -1) + RTE_PER_LCORE(_thread_id) = rte_sys_gettid(); + return RTE_PER_LCORE(_thread_id); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EAL_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h new file mode 100644 index 00000000..2b5e0b17 --- /dev/null +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EAL_MEMCONFIG_H_ +#define _RTE_EAL_MEMCONFIG_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * the structure for the memory configuration for the RTE. + * Used by the rte_config structure. It is separated out, as for multi-process + * support, the memory details should be shared across instances + */ +struct rte_mem_config { + volatile uint32_t magic; /**< Magic number - Sanity check. */ + + /* memory topology */ + uint32_t nchannel; /**< Number of channels (0 if unknown). */ + uint32_t nrank; /**< Number of ranks (0 if unknown). */ + + /** + * current lock nest order + * - qlock->mlock (ring/hash/lpm) + * - mplock->qlock->mlock (mempool) + * Notice: + * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock + */ + rte_rwlock_t mlock; /**< only used by memzone LIB for thread-safe. */ + rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */ + rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */ + + uint32_t memzone_cnt; /**< Number of allocated memzones */ + + /* memory segments and zones */ + struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */ + struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ + + struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ + + /* Heaps of Malloc per socket */ + struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES]; + + /* address of mem_config in primary process. used to map shared config into + * exact same address the primary process maps it. + */ + uint64_t mem_cfg_addr; +} __attribute__((__packed__)); + + +inline static void +rte_eal_mcfg_wait_complete(struct rte_mem_config* mcfg) +{ + /* wait until shared mem_config finish initialising */ + while(mcfg->magic != RTE_MAGIC) + rte_pause(); +} + +#ifdef __cplusplus +} +#endif + +#endif /*__RTE_EAL_MEMCONFIG_H_*/ diff --git a/lib/librte_eal/common/include/rte_errno.h b/lib/librte_eal/common/include/rte_errno.h new file mode 100644 index 00000000..2e5cc454 --- /dev/null +++ b/lib/librte_eal/common/include/rte_errno.h @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * API for error cause tracking + */ + +#ifndef _RTE_ERRNO_H_ +#define _RTE_ERRNO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */ + +/** + * Error number value, stored per-thread, which can be queried after + * calls to certain functions to determine why those functions failed. + * + * Uses standard values from errno.h wherever possible, with a small number + * of additional possible values for RTE-specific conditions. + */ +#define rte_errno RTE_PER_LCORE(_rte_errno) + +/** + * Function which returns a printable string describing a particular + * error code. For non-RTE-specific error codes, this function returns + * the value from the libc strerror function. + * + * @param errnum + * The error number to be looked up - generally the value of rte_errno + * @return + * A pointer to a thread-local string containing the text describing + * the error. + */ +const char *rte_strerror(int errnum); + +#ifndef __ELASTERROR +/** + * Check if we have a defined value for the max system-defined errno values. + * if no max defined, start from 1000 to prevent overlap with standard values + */ +#define __ELASTERROR 1000 +#endif + +/** Error types */ +enum { + RTE_MIN_ERRNO = __ELASTERROR, /**< Start numbering above std errno vals */ + + E_RTE_SECONDARY, /**< Operation not allowed in secondary processes */ + E_RTE_NO_CONFIG, /**< Missing rte_config */ + + RTE_MAX_ERRNO /**< Max RTE error number */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ERRNO_H_ */ diff --git a/lib/librte_eal/common/include/rte_hexdump.h b/lib/librte_eal/common/include/rte_hexdump.h new file mode 100644 index 00000000..5c18a50b --- /dev/null +++ b/lib/librte_eal/common/include/rte_hexdump.h @@ -0,0 +1,89 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_HEXDUMP_H_ +#define _RTE_HEXDUMP_H_ + +/** + * @file + * Simple API to dump out memory in a special hex format. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* Dump out memory in a special hex dump format. +* +* @param f +* A pointer to a file for output +* @param title +* If not NULL this string is printed as a header to the output. +* @param buf +* This is the buffer address to print out. +* @param len +* The number of bytes to dump out +* @return +* None. +*/ + +extern void +rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len); + +/** +* Dump out memory in a hex format with colons between bytes. +* +* @param f +* A pointer to a file for output +* @param title +* If not NULL this string is printed as a header to the output. +* @param buf +* This is the buffer address to print out. +* @param len +* The number of bytes to dump out +* @return +* None. +*/ + +void +rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len); + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_HEXDUMP_H_ */ diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h new file mode 100644 index 00000000..ff11ef3a --- /dev/null +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#define _RTE_INTERRUPTS_H_ + +/** + * @file + * + * The RTE interrupt interface provides functions to register/unregister + * callbacks for a specific interrupt. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Interrupt handle */ +struct rte_intr_handle; + +/** Function to be registered for the specific interrupt */ +typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, + void *cb_arg); + +#include + +/** + * It registers the callback for the specific interrupt. Multiple + * callbacks cal be registered at the same time. + * @param intr_handle + * Pointer to the interrupt handle. + * @param cb + * callback address. + * @param cb_arg + * address of parameter for callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_callback_register(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg); + +/** + * It unregisters the callback according to the specified interrupt handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * @param cb + * callback address. + * @param cb_arg + * address of parameter for callback, (void *)-1 means to remove all + * registered which has the same callback address. + * + * @return + * - On success, return the number of callback entities removed. + * - On failure, a negative value. + */ +int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg); + +/** + * It enables the interrupt for the specified handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_enable(struct rte_intr_handle *intr_handle); + +/** + * It disables the interrupt for the specified handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_disable(struct rte_intr_handle *intr_handle); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h new file mode 100644 index 00000000..10dac2e0 --- /dev/null +++ b/lib/librte_eal/common/include/rte_keepalive.h @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 Intel Shannon Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file rte_keepalive.h + * DPDK RTE LCore Keepalive Monitor. + * + **/ + +#ifndef _KEEPALIVE_H_ +#define _KEEPALIVE_H_ + +#include + +#ifndef RTE_KEEPALIVE_MAXCORES +/** + * Number of cores to track. + * @note Must be larger than the highest core id. */ +#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE +#endif + +/** + * Keepalive failure callback. + * + * Receives a data pointer passed to rte_keepalive_create() and the id of the + * failed core. + */ +typedef void (*rte_keepalive_failure_callback_t)( + void *data, + const int id_core); + +/** + * Keepalive state structure. + * @internal + */ +struct rte_keepalive; + +/** + * Initialise keepalive sub-system. + * @param callback + * Function called upon detection of a dead core. + * @param data + * Data pointer to be passed to function callback. + * @return + * Keepalive structure success, NULL on failure. + */ +struct rte_keepalive *rte_keepalive_create( + rte_keepalive_failure_callback_t callback, + void *data); + +/** + * Checks & handles keepalive state of monitored cores. + * @param *ptr_timer Triggering timer (unused) + * @param *ptr_data Data pointer (keepalive structure) + */ +void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data); + +/** + * Registers a core for keepalive checks. + * @param *keepcfg + * Keepalive structure pointer + * @param id_core + * ID number of core to register. + */ +void rte_keepalive_register_core(struct rte_keepalive *keepcfg, + const int id_core); + +/** + * Per-core keepalive check. + * @param *keepcfg + * Keepalive structure pointer + * + * This function needs to be called from within the main process loop of + * the LCore to be checked. + */ +void +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg); + +#endif /* _KEEPALIVE_H_ */ diff --git a/lib/librte_eal/common/include/rte_launch.h b/lib/librte_eal/common/include/rte_launch.h new file mode 100644 index 00000000..dd1946da --- /dev/null +++ b/lib/librte_eal/common/include/rte_launch.h @@ -0,0 +1,177 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LAUNCH_H_ +#define _RTE_LAUNCH_H_ + +/** + * @file + * + * Launch tasks on other lcores + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * State of an lcore. + */ +enum rte_lcore_state_t { + WAIT, /**< waiting a new command */ + RUNNING, /**< executing command */ + FINISHED, /**< command executed */ +}; + +/** + * Definition of a remote launch function. + */ +typedef int (lcore_function_t)(void *); + +/** + * Launch a function on another lcore. + * + * To be executed on the MASTER lcore only. + * + * Sends a message to a slave lcore (identified by the slave_id) that + * is in the WAIT state (this is true after the first call to + * rte_eal_init()). This can be checked by first calling + * rte_eal_wait_lcore(slave_id). + * + * When the remote lcore receives the message, it switches to + * the RUNNING state, then calls the function f with argument arg. Once the + * execution is done, the remote lcore switches to a FINISHED state and + * the return value of f is stored in a local variable to be read using + * rte_eal_wait_lcore(). + * + * The MASTER lcore returns as soon as the message is sent and knows + * nothing about the completion of f. + * + * Note: This function is not designed to offer optimum + * performance. It is just a practical way to launch a function on + * another lcore at initialization time. + * + * @param f + * The function to be called. + * @param arg + * The argument for the function. + * @param slave_id + * The identifier of the lcore on which the function should be executed. + * @return + * - 0: Success. Execution of function f started on the remote lcore. + * - (-EBUSY): The remote lcore is not in a WAIT state. + */ +int rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned slave_id); + +/** + * This enum indicates whether the master core must execute the handler + * launched on all logical cores. + */ +enum rte_rmt_call_master_t { + SKIP_MASTER = 0, /**< lcore handler not executed by master core. */ + CALL_MASTER, /**< lcore handler executed by master core. */ +}; + +/** + * Launch a function on all lcores. + * + * Check that each SLAVE lcore is in a WAIT state, then call + * rte_eal_remote_launch() for each lcore. + * + * @param f + * The function to be called. + * @param arg + * The argument for the function. + * @param call_master + * If call_master set to SKIP_MASTER, the MASTER lcore does not call + * the function. If call_master is set to CALL_MASTER, the function + * is also called on master before returning. In any case, the master + * lcore returns as soon as it finished its job and knows nothing + * about the completion of f on the other lcores. + * @return + * - 0: Success. Execution of function f started on all remote lcores. + * - (-EBUSY): At least one remote lcore is not in a WAIT state. In this + * case, no message is sent to any of the lcores. + */ +int rte_eal_mp_remote_launch(lcore_function_t *f, void *arg, + enum rte_rmt_call_master_t call_master); + +/** + * Get the state of the lcore identified by slave_id. + * + * To be executed on the MASTER lcore only. + * + * @param slave_id + * The identifier of the lcore. + * @return + * The state of the lcore. + */ +enum rte_lcore_state_t rte_eal_get_lcore_state(unsigned slave_id); + +/** + * Wait until an lcore finishes its job. + * + * To be executed on the MASTER lcore only. + * + * If the slave lcore identified by the slave_id is in a FINISHED state, + * switch to the WAIT state. If the lcore is in RUNNING state, wait until + * the lcore finishes its job and moves to the FINISHED state. + * + * @param slave_id + * The identifier of the lcore. + * @return + * - 0: If the lcore identified by the slave_id is in a WAIT state. + * - The value that was returned by the previous remote launch + * function call if the lcore identified by the slave_id was in a + * FINISHED or RUNNING state. In this case, it changes the state + * of the lcore to WAIT. + */ +int rte_eal_wait_lcore(unsigned slave_id); + +/** + * Wait until all lcores finish their jobs. + * + * To be executed on the MASTER lcore only. Issue an + * rte_eal_wait_lcore() for every lcore. The return values are + * ignored. + * + * After a call to rte_eal_mp_wait_lcore(), the caller can assume + * that all slave lcores are in a WAIT state. + */ +void rte_eal_mp_wait_lcore(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LAUNCH_H_ */ diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h new file mode 100644 index 00000000..ac151302 --- /dev/null +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -0,0 +1,276 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LCORE_H_ +#define _RTE_LCORE_H_ + +/** + * @file + * + * API for lcore and socket manipulation + * + */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define LCORE_ID_ANY UINT32_MAX /**< Any lcore. */ + +#if defined(__linux__) + typedef cpu_set_t rte_cpuset_t; +#elif defined(__FreeBSD__) +#include + typedef cpuset_t rte_cpuset_t; +#endif + +/** + * Structure storing internal configuration (per-lcore) + */ +struct lcore_config { + unsigned detected; /**< true if lcore was detected */ + pthread_t thread_id; /**< pthread identifier */ + int pipe_master2slave[2]; /**< communication pipe with master */ + int pipe_slave2master[2]; /**< communication pipe with master */ + lcore_function_t * volatile f; /**< function to call */ + void * volatile arg; /**< argument of function */ + volatile int ret; /**< return value of function */ + volatile enum rte_lcore_state_t state; /**< lcore state */ + unsigned socket_id; /**< physical socket id for this lcore */ + unsigned core_id; /**< core number on socket for this lcore */ + int core_index; /**< relative index, starting from 0 */ + rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */ +}; + +/** + * Internal configuration (per-lcore) + */ +extern struct lcore_config lcore_config[RTE_MAX_LCORE]; + +RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per thread "lcore id". */ +RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */ + +/** + * Return the ID of the execution unit we are running on. + * @return + * Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread) + */ +static inline unsigned +rte_lcore_id(void) +{ + return RTE_PER_LCORE(_lcore_id); +} + +/** + * Get the id of the master lcore + * + * @return + * the id of the master lcore + */ +static inline unsigned +rte_get_master_lcore(void) +{ + return rte_eal_get_configuration()->master_lcore; +} + +/** + * Return the number of execution units (lcores) on the system. + * + * @return + * the number of execution units (lcores) on the system. + */ +static inline unsigned +rte_lcore_count(void) +{ + const struct rte_config *cfg = rte_eal_get_configuration(); + return cfg->lcore_count; +} + +/** + * Return the index of the lcore starting from zero. + * The order is physical or given by command line (-l option). + * + * @param lcore_id + * The targeted lcore, or -1 for the current one. + * @return + * The relative index, or -1 if not enabled. + */ +static inline int +rte_lcore_index(int lcore_id) +{ + if (lcore_id >= RTE_MAX_LCORE) + return -1; + if (lcore_id < 0) + lcore_id = rte_lcore_id(); + return lcore_config[lcore_id].core_index; +} + +/** + * Return the ID of the physical socket of the logical core we are + * running on. + * @return + * the ID of current lcoreid's physical socket + */ +unsigned rte_socket_id(void); + +/** + * Get the ID of the physical socket of the specified lcore + * + * @param lcore_id + * the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1. + * @return + * the ID of lcoreid's physical socket + */ +static inline unsigned +rte_lcore_to_socket_id(unsigned lcore_id) +{ + return lcore_config[lcore_id].socket_id; +} + +/** + * Test if an lcore is enabled. + * + * @param lcore_id + * The identifier of the lcore, which MUST be between 0 and + * RTE_MAX_LCORE-1. + * @return + * True if the given lcore is enabled; false otherwise. + */ +static inline int +rte_lcore_is_enabled(unsigned lcore_id) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + if (lcore_id >= RTE_MAX_LCORE) + return 0; + return cfg->lcore_role[lcore_id] != ROLE_OFF; +} + +/** + * Get the next enabled lcore ID. + * + * @param i + * The current lcore (reference). + * @param skip_master + * If true, do not return the ID of the master lcore. + * @param wrap + * If true, go back to 0 when RTE_MAX_LCORE is reached; otherwise, + * return RTE_MAX_LCORE. + * @return + * The next lcore_id or RTE_MAX_LCORE if not found. + */ +static inline unsigned +rte_get_next_lcore(unsigned i, int skip_master, int wrap) +{ + i++; + if (wrap) + i %= RTE_MAX_LCORE; + + while (i < RTE_MAX_LCORE) { + if (!rte_lcore_is_enabled(i) || + (skip_master && (i == rte_get_master_lcore()))) { + i++; + if (wrap) + i %= RTE_MAX_LCORE; + continue; + } + break; + } + return i; +} +/** + * Macro to browse all running lcores. + */ +#define RTE_LCORE_FOREACH(i) \ + for (i = rte_get_next_lcore(-1, 0, 0); \ + i= 2.12 supports this feature. + * + * This macro only used for Linux, BSD does direct libc call. + * BSD libc version of function is `pthread_set_name_np()`. + */ +#if defined(__DOXYGEN__) +#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) +#endif + +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +#if __GLIBC_PREREQ(2, 12) +#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) +#else +#define rte_thread_setname(...) 0 +#endif +#endif + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_LCORE_H_ */ diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h new file mode 100644 index 00000000..2e47e7f6 --- /dev/null +++ b/lib/librte_eal/common/include/rte_log.h @@ -0,0 +1,311 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LOG_H_ +#define _RTE_LOG_H_ + +/** + * @file + * + * RTE Logs API + * + * This file provides a log API to RTE applications. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** The rte_log structure. */ +struct rte_logs { + uint32_t type; /**< Bitfield with enabled logs. */ + uint32_t level; /**< Log level. */ + FILE *file; /**< Pointer to current FILE* for logs. */ +}; + +/** Global log informations */ +extern struct rte_logs rte_logs; + +/* SDK log type */ +#define RTE_LOGTYPE_EAL 0x00000001 /**< Log related to eal. */ +#define RTE_LOGTYPE_MALLOC 0x00000002 /**< Log related to malloc. */ +#define RTE_LOGTYPE_RING 0x00000004 /**< Log related to ring. */ +#define RTE_LOGTYPE_MEMPOOL 0x00000008 /**< Log related to mempool. */ +#define RTE_LOGTYPE_TIMER 0x00000010 /**< Log related to timers. */ +#define RTE_LOGTYPE_PMD 0x00000020 /**< Log related to poll mode driver. */ +#define RTE_LOGTYPE_HASH 0x00000040 /**< Log related to hash table. */ +#define RTE_LOGTYPE_LPM 0x00000080 /**< Log related to LPM. */ +#define RTE_LOGTYPE_KNI 0x00000100 /**< Log related to KNI. */ +#define RTE_LOGTYPE_ACL 0x00000200 /**< Log related to ACL. */ +#define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */ +#define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */ +#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */ +#define RTE_LOGTYPE_PORT 0x00002000 /**< Log related to port. */ +#define RTE_LOGTYPE_TABLE 0x00004000 /**< Log related to table. */ +#define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */ +#define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */ +#define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */ + +/* these log types can be used in an application */ +#define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ +#define RTE_LOGTYPE_USER2 0x02000000 /**< User-defined log type 2. */ +#define RTE_LOGTYPE_USER3 0x04000000 /**< User-defined log type 3. */ +#define RTE_LOGTYPE_USER4 0x08000000 /**< User-defined log type 4. */ +#define RTE_LOGTYPE_USER5 0x10000000 /**< User-defined log type 5. */ +#define RTE_LOGTYPE_USER6 0x20000000 /**< User-defined log type 6. */ +#define RTE_LOGTYPE_USER7 0x40000000 /**< User-defined log type 7. */ +#define RTE_LOGTYPE_USER8 0x80000000 /**< User-defined log type 8. */ + +/* Can't use 0, as it gives compiler warnings */ +#define RTE_LOG_EMERG 1U /**< System is unusable. */ +#define RTE_LOG_ALERT 2U /**< Action must be taken immediately. */ +#define RTE_LOG_CRIT 3U /**< Critical conditions. */ +#define RTE_LOG_ERR 4U /**< Error conditions. */ +#define RTE_LOG_WARNING 5U /**< Warning conditions. */ +#define RTE_LOG_NOTICE 6U /**< Normal but significant condition. */ +#define RTE_LOG_INFO 7U /**< Informational. */ +#define RTE_LOG_DEBUG 8U /**< Debug-level messages. */ + +/** The default log stream. */ +extern FILE *eal_default_log_stream; + +/** + * Change the stream that will be used by the logging system. + * + * This can be done at any time. The f argument represents the stream + * to be used to send the logs. If f is NULL, the default output is + * used (stderr). + * + * @param f + * Pointer to the stream. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_openlog_stream(FILE *f); + +/** + * Set the global log level. + * + * After this call, all logs that are lower or equal than level and + * lower or equal than the RTE_LOG_LEVEL configuration option will be + * displayed. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + */ +void rte_set_log_level(uint32_t level); + +/** + * Get the global log level. + */ +uint32_t rte_get_log_level(void); + +/** + * Enable or disable the log type. + * + * @param type + * Log type, for example, RTE_LOGTYPE_EAL. + * @param enable + * True for enable; false for disable. + */ +void rte_set_log_type(uint32_t type, int enable); + +/** + * Get the global log type. + */ +uint32_t rte_get_log_type(void); + +/** + * Get the current loglevel for the message being processed. + * + * Before calling the user-defined stream for logging, the log + * subsystem sets a per-lcore variable containing the loglevel and the + * logtype of the message being processed. This information can be + * accessed by the user-defined log output function through this + * function. + * + * @return + * The loglevel of the message being processed. + */ +int rte_log_cur_msg_loglevel(void); + +/** + * Get the current logtype for the message being processed. + * + * Before calling the user-defined stream for logging, the log + * subsystem sets a per-lcore variable containing the loglevel and the + * logtype of the message being processed. This information can be + * accessed by the user-defined log output function through this + * function. + * + * @return + * The logtype of the message being processed. + */ +int rte_log_cur_msg_logtype(void); + +/** + * Enable or disable the history (enabled by default) + * + * @param enable + * true to enable, or 0 to disable history. + */ +void rte_log_set_history(int enable); + +/** + * Dump the log history to a file + * + * @param f + * A pointer to a file for output + */ +void rte_log_dump_history(FILE *f); + +/** + * Add a log message to the history. + * + * This function can be called from a user-defined log stream. It adds + * the given message in the history that can be dumped using + * rte_log_dump_history(). + * + * @param buf + * A data buffer containing the message to be saved in the history. + * @param size + * The length of the data buffer. + * @return + * - 0: Success. + * - (-ENOBUFS) if there is no room to store the message. + */ +int rte_log_add_in_history(const char *buf, size_t size); + +/** + * Generates a log message. + * + * The message will be sent in the stream defined by the previous call + * to rte_openlog_stream(). + * + * The level argument determines if the log should be displayed or + * not, depending on the global rte_logs variable. + * + * The preferred alternative is the RTE_LOG() function because debug logs may + * be removed at compilation time if optimization is enabled. Moreover, + * logs are automatically prefixed by type when using the macro. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + * @param logtype + * The log type, for example, RTE_LOGTYPE_EAL. + * @param format + * The format string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) +#ifdef __GNUC__ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) + __attribute__((cold)) +#endif +#endif + __attribute__((format(printf, 3, 4))); + +/** + * Generates a log message. + * + * The message will be sent in the stream defined by the previous call + * to rte_openlog_stream(). + * + * The level argument determines if the log should be displayed or + * not, depending on the global rte_logs variable. A trailing + * newline may be added if needed. + * + * The preferred alternative is the RTE_LOG() because debug logs may be + * removed at compilation time. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + * @param logtype + * The log type, for example, RTE_LOGTYPE_EAL. + * @param format + * The format string, as in printf(3), followed by the variable arguments + * required by the format. + * @param ap + * The va_list of the variable arguments required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) + __attribute__((format(printf,3,0))); + +/** + * Generates a log message. + * + * The RTE_LOG() is equivalent to rte_log() with two differences: + + * - RTE_LOG() can be used to remove debug logs at compilation time, + * depending on RTE_LOG_LEVEL configuration option, and compilation + * optimization level. If optimization is enabled, the tests + * involving constants only are pre-computed. If compilation is done + * with -O0, these tests will be done at run time. + * - The log level and log type names are smaller, for example: + * RTE_LOG(INFO, EAL, "this is a %s", "log"); + * + * @param l + * Log level. A value between EMERG (1) and DEBUG (8). The short name is + * expanded by the macro, so it cannot be an integer value. + * @param t + * The log type, for example, EAL. The short name is expanded by the + * macro, so it cannot be an integer value. + * @param ... + * The fmt string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +#define RTE_LOG(l, t, ...) \ + (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \ + 0) + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LOG_H_ */ diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h new file mode 100644 index 00000000..74bb78c7 --- /dev/null +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -0,0 +1,342 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MALLOC_H_ +#define _RTE_MALLOC_H_ + +/** + * @file + * RTE Malloc. This library provides methods for dynamically allocating memory + * from hugepages. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Structure to hold heap statistics obtained from rte_malloc_get_socket_stats function. + */ +struct rte_malloc_socket_stats { + size_t heap_totalsz_bytes; /**< Total bytes on heap */ + size_t heap_freesz_bytes; /**< Total free bytes on heap */ + size_t greatest_free_size; /**< Size in bytes of largest free block */ + unsigned free_count; /**< Number of free elements on heap */ + unsigned alloc_count; /**< Number of allocated elements on heap */ + size_t heap_allocsz_bytes; /**< Total allocated bytes on heap */ +}; + +/** + * This function allocates memory from the huge-page area of memory. The memory + * is not cleared. In NUMA systems, the memory allocated resides on the same + * NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_malloc(const char *type, size_t size, unsigned align); + +/** + * Allocate zero'ed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. In NUMA systems, the memory allocated resides on the + * same NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_zmalloc(const char *type, size_t size, unsigned align); + +/** + * Replacement function for calloc(), using huge-page memory. Memory area is + * initialised with zeros. In NUMA systems, the memory allocated resides on the + * same NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param num + * Number of elements to be allocated. + * @param size + * Size (in bytes) of a single element. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_calloc(const char *type, size_t num, size_t size, unsigned align); + +/** + * Replacement function for realloc(), using huge-page memory. Reserved area + * memory is resized, preserving contents. In NUMA systems, the new area + * resides on the same NUMA socket as the old area. + * + * @param ptr + * Pointer to already allocated memory + * @param size + * Size (in bytes) of new area. If this is 0, memory is freed. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the reallocated memory. + */ +void * +rte_realloc(void *ptr, size_t size, unsigned align); + +/** + * This function allocates memory from the huge-page area of memory. The memory + * is not cleared. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_malloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_malloc_socket(const char *type, size_t size, unsigned align, int socket); + +/** + * Allocate zero'ed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_zmalloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket); + +/** + * Replacement function for calloc(), using huge-page memory. Memory area is + * initialised with zeros. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param num + * Number of elements to be allocated. + * @param size + * Size (in bytes) of a single element. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_calloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket); + +/** + * Frees the memory space pointed to by the provided pointer. + * + * This pointer must have been returned by a previous call to + * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). The behaviour of + * rte_free() is undefined if the pointer does not match this requirement. + * + * If the pointer is NULL, the function does nothing. + * + * @param ptr + * The pointer to memory to be freed. + */ +void +rte_free(void *ptr); + +/** + * If malloc debug is enabled, check a memory block for header + * and trailer markers to indicate that all is well with the block. + * If size is non-null, also return the size of the block. + * + * @param ptr + * pointer to the start of a data block, must have been returned + * by a previous call to rte_malloc(), rte_zmalloc(), rte_calloc() + * or rte_realloc() + * @param size + * if non-null, and memory block pointer is valid, returns the size + * of the memory block + * @return + * -1 on error, invalid pointer passed or header and trailer markers + * are missing or corrupted + * 0 on success + */ +int +rte_malloc_validate(const void *ptr, size_t *size); + +/** + * Get heap statistics for the specified heap. + * + * @param socket + * An unsigned integer specifying the socket to get heap statistics for + * @param socket_stats + * A structure which provides memory to store statistics + * @return + * Null on error + * Pointer to structure storing statistics on success + */ +int +rte_malloc_get_socket_stats(int socket, + struct rte_malloc_socket_stats *socket_stats); + +/** + * Dump statistics. + * + * Dump for the specified type to the console. If the type argument is + * NULL, all memory types will be dumped. + * + * @param f + * A pointer to a file for output + * @param type + * A string identifying the type of objects to dump, or NULL + * to dump all objects. + */ +void +rte_malloc_dump_stats(FILE *f, const char *type); + +/** + * Set the maximum amount of allocated memory for this type. + * + * This is not yet implemented + * + * @param type + * A string identifying the type of allocated objects. + * @param max + * The maximum amount of allocated bytes for this type. + * @return + * - 0: Success. + * - (-1): Error. + */ +int +rte_malloc_set_limit(const char *type, size_t max); + +/** + * Return the physical address of a virtual address obtained through + * rte_malloc + * + * @param addr + * Adress obtained from a previous rte_malloc call + * @return + * NULL on error + * otherwise return physical address of the buffer + */ +phys_addr_t +rte_malloc_virt2phy(const void *addr); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MALLOC_H_ */ diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h new file mode 100644 index 00000000..b2703562 --- /dev/null +++ b/lib/librte_eal/common/include/rte_malloc_heap.h @@ -0,0 +1,55 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MALLOC_HEAP_H_ +#define _RTE_MALLOC_HEAP_H_ + +#include +#include +#include +#include + +/* Number of free lists per heap, grouped by size. */ +#define RTE_HEAP_NUM_FREELISTS 13 + +/** + * Structure to hold malloc heap + */ +struct malloc_heap { + rte_spinlock_t lock; + LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; + unsigned alloc_count; + size_t total_size; +} __rte_cache_aligned; + +#endif /* _RTE_MALLOC_HEAP_H_ */ diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h new file mode 100644 index 00000000..f8dbece0 --- /dev/null +++ b/lib/librte_eal/common/include/rte_memory.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMORY_H_ +#define _RTE_MEMORY_H_ + +/** + * @file + * + * Memory-related RTE API. + */ + +#include +#include +#include + +#ifdef RTE_EXEC_ENV_LINUXAPP +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +enum rte_page_sizes { + RTE_PGSIZE_4K = 1ULL << 12, + RTE_PGSIZE_64K = 1ULL << 16, + RTE_PGSIZE_256K = 1ULL << 18, + RTE_PGSIZE_2M = 1ULL << 21, + RTE_PGSIZE_16M = 1ULL << 24, + RTE_PGSIZE_256M = 1ULL << 28, + RTE_PGSIZE_512M = 1ULL << 29, + RTE_PGSIZE_1G = 1ULL << 30, + RTE_PGSIZE_4G = 1ULL << 32, + RTE_PGSIZE_16G = 1ULL << 34, +}; + +#define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ +#define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1) /**< Cache line mask. */ + +#define RTE_CACHE_LINE_ROUNDUP(size) \ + (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE)) +/**< Return the first cache-aligned value greater or equal to size. */ + +/**< Cache line size in terms of log2 */ +#if RTE_CACHE_LINE_SIZE == 64 +#define RTE_CACHE_LINE_SIZE_LOG2 6 +#elif RTE_CACHE_LINE_SIZE == 128 +#define RTE_CACHE_LINE_SIZE_LOG2 7 +#else +#error "Unsupported cache line size" +#endif + +#define RTE_CACHE_LINE_MIN_SIZE 64 /**< Minimum Cache line size. */ + +/** + * Force alignment to cache line. + */ +#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE) + +/** + * Force minimum cache line alignment. + */ +#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE) + +typedef uint64_t phys_addr_t; /**< Physical address definition. */ +#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1) + +/** + * Physical memory segment descriptor. + */ +struct rte_memseg { + phys_addr_t phys_addr; /**< Start physical address. */ + union { + void *addr; /**< Start virtual address. */ + uint64_t addr_64; /**< Makes sure addr is always 64 bits */ + }; +#ifdef RTE_LIBRTE_IVSHMEM + phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ +#endif + size_t len; /**< Length of the segment. */ + uint64_t hugepage_sz; /**< The pagesize of underlying memory */ + int32_t socket_id; /**< NUMA socket ID. */ + uint32_t nchannel; /**< Number of channels. */ + uint32_t nrank; /**< Number of ranks. */ +#ifdef RTE_LIBRTE_XEN_DOM0 + /**< store segment MFNs */ + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +#endif +} __rte_packed; + +/** + * Lock page in physical memory and prevent from swapping. + * + * @param virt + * The virtual address. + * @return + * 0 on success, negative on error. + */ +int rte_mem_lock_page(const void *virt); + +/** + * Get physical address of any mapped virtual address in the current process. + * It is found by browsing the /proc/self/pagemap special file. + * The page must be locked. + * + * @param virt + * The virtual address. + * @return + * The physical address or RTE_BAD_PHYS_ADDR on error. + */ +phys_addr_t rte_mem_virt2phy(const void *virt); + +/** + * Get the layout of the available physical memory. + * + * It can be useful for an application to have the full physical + * memory layout to decide the size of a memory zone to reserve. This + * table is stored in rte_config (see rte_eal_get_configuration()). + * + * @return + * - On success, return a pointer to a read-only table of struct + * rte_physmem_desc elements, containing the layout of all + * addressable physical memory. The last element of the table + * contains a NULL address. + * - On error, return NULL. This should not happen since it is a fatal + * error that will probably cause the entire system to panic. + */ +const struct rte_memseg *rte_eal_get_physmem_layout(void); + +/** + * Dump the physical memory layout to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_dump_physmem_layout(FILE *f); + +/** + * Get the total amount of available physical memory. + * + * @return + * The total amount of available physical memory in bytes. + */ +uint64_t rte_eal_get_physmem_size(void); + +/** + * Get the number of memory channels. + * + * @return + * The number of memory channels on the system. The value is 0 if unknown + * or not the same on all devices. + */ +unsigned rte_memory_get_nchannel(void); + +/** + * Get the number of memory ranks. + * + * @return + * The number of memory ranks on the system. The value is 0 if unknown or + * not the same on all devices. + */ +unsigned rte_memory_get_nrank(void); + +#ifdef RTE_LIBRTE_XEN_DOM0 + +/**< Internal use only - should DOM0 memory mapping be used */ +int rte_xen_dom0_supported(void); + +/**< Internal use only - phys to virt mapping for xen */ +phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t); + +/** + * Return the physical address of elt, which is an element of the pool mp. + * + * @param memseg_id + * The mempool is from which memory segment. + * @param phy_addr + * physical address of elt. + * + * @return + * The physical address or error. + */ +static inline phys_addr_t +rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +{ + if (rte_xen_dom0_supported()) + return rte_xen_mem_phy2mch(memseg_id, phy_addr); + else + return phy_addr; +} + +/** + * Memory init for supporting application running on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_init(void); + +/** + * Attach to memory setments of primary process on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_attach(void); +#else +static inline int rte_xen_dom0_supported(void) +{ + return 0; +} + +static inline phys_addr_t +rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr) +{ + return phy_addr; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMORY_H_ */ diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h new file mode 100644 index 00000000..f69b5a87 --- /dev/null +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMZONE_H_ +#define _RTE_MEMZONE_H_ + +/** + * @file + * RTE Memzone + * + * The goal of the memzone allocator is to reserve contiguous + * portions of physical memory. These zones are identified by a name. + * + * The memzone descriptors are shared by all partitions and are + * located in a known place of physical memory. This zone is accessed + * using rte_eal_get_configuration(). The lookup (by name) of a + * memory zone can be done in any partition and returns the same + * physical address. + * + * A reserved memory zone cannot be unreserved. The reservation shall + * be done at initialization time only. + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MEMZONE_2MB 0x00000001 /**< Use 2MB pages. */ +#define RTE_MEMZONE_1GB 0x00000002 /**< Use 1GB pages. */ +#define RTE_MEMZONE_16MB 0x00000100 /**< Use 16MB pages. */ +#define RTE_MEMZONE_16GB 0x00000200 /**< Use 16GB pages. */ +#define RTE_MEMZONE_256KB 0x00010000 /**< Use 256KB pages. */ +#define RTE_MEMZONE_256MB 0x00020000 /**< Use 256MB pages. */ +#define RTE_MEMZONE_512MB 0x00040000 /**< Use 512MB pages. */ +#define RTE_MEMZONE_4GB 0x00080000 /**< Use 4GB pages. */ +#define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */ + +/** + * A structure describing a memzone, which is a contiguous portion of + * physical memory identified by a name. + */ +struct rte_memzone { + +#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/ + char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ + + phys_addr_t phys_addr; /**< Start physical address. */ + union { + void *addr; /**< Start virtual address. */ + uint64_t addr_64; /**< Makes sure addr is always 64-bits */ + }; +#ifdef RTE_LIBRTE_IVSHMEM + phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ +#endif + size_t len; /**< Length of the memzone. */ + + uint64_t hugepage_sz; /**< The page size of underlying memory */ + + int32_t socket_id; /**< NUMA socket ID. */ + + uint32_t flags; /**< Characteristics of this memzone. */ + uint32_t memseg_id; /**< Memseg it belongs. */ +} __attribute__((__packed__)); + +/** + * Reserve a portion of physical memory. + * + * This function reserves some memory and returns a pointer to a + * correctly filled memzone descriptor. If the allocation cannot be + * done, return NULL. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve(const char *name, + size_t len, int socket_id, + unsigned flags); + +/** + * Reserve a portion of physical memory with alignment on a specified + * boundary. + * + * This function reserves some memory with alignment on a specified + * boundary, and returns a pointer to a correctly filled memzone + * descriptor. If the allocation cannot be done or if the alignment + * is not a power of 2, returns NULL. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve_aligned(const char *name, + size_t len, int socket_id, + unsigned flags, unsigned align); + +/** + * Reserve a portion of physical memory with specified alignment and + * boundary. + * + * This function reserves some memory with specified alignment and + * boundary, and returns a pointer to a correctly filled memzone + * descriptor. If the allocation cannot be done or if the alignment + * or boundary are not a power of 2, returns NULL. + * Memory buffer is reserved in a way, that it wouldn't cross specified + * boundary. That implies that requested length should be less or equal + * then boundary. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @param bound + * Boundary for resulting memzone. Must be a power of 2 or zero. + * Zero value implies no boundary condition. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve_bounded(const char *name, + size_t len, int socket_id, + unsigned flags, unsigned align, unsigned bound); + +/** + * Free a memzone. + * + * Note: an IVSHMEM zone cannot be freed. + * + * @param mz + * A pointer to the memzone + * @return + * -EINVAL - invalid parameter, IVSHMEM memzone. + * 0 - success + */ +int rte_memzone_free(const struct rte_memzone *mz); + +/** + * Lookup for a memzone. + * + * Get a pointer to a descriptor of an already reserved memory + * zone identified by the name given as an argument. + * + * @param name + * The name of the memzone. + * @return + * A pointer to a read-only memzone descriptor. + */ +const struct rte_memzone *rte_memzone_lookup(const char *name); + +/** + * Dump all reserved memzones to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_memzone_dump(FILE *f); + +/** + * Walk list of all memzones + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + */ +void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg), + void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMZONE_H_ */ diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h new file mode 100644 index 00000000..e692094e --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci.h @@ -0,0 +1,598 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright 2013-2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_H_ +#define _RTE_PCI_H_ + +/** + * @file + * + * RTE PCI Interface + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ +TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ + +extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */ +extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */ + +/** Pathname of PCI devices directory. */ +#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" + +/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ +#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + +/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ +#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + +/** Nb. of values in PCI device identifier format string. */ +#define PCI_FMT_NVAL 4 + +/** Nb. of values in PCI resource format. */ +#define PCI_RESOURCE_FMT_NVAL 3 + +/** IO resource type: memory address space */ +#define IORESOURCE_MEM 0x00000200 + +/** + * A structure describing a PCI resource. + */ +struct rte_pci_resource { + uint64_t phys_addr; /**< Physical address, 0 if no resource. */ + uint64_t len; /**< Length of the resource. */ + void *addr; /**< Virtual address, NULL when not mapped. */ +}; + +/** Maximum number of PCI resources. */ +#define PCI_MAX_RESOURCE 6 + +/** + * A structure describing an ID for a PCI driver. Each driver provides a + * table of these IDs for each device that it supports. + */ +struct rte_pci_id { + uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ + uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ + uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */ +}; + +/** + * A structure describing the location of a PCI device. + */ +struct rte_pci_addr { + uint16_t domain; /**< Device domain */ + uint8_t bus; /**< Device bus */ + uint8_t devid; /**< Device ID */ + uint8_t function; /**< Device function. */ +}; + +struct rte_devargs; + +enum rte_kernel_driver { + RTE_KDRV_UNKNOWN = 0, + RTE_KDRV_IGB_UIO, + RTE_KDRV_VFIO, + RTE_KDRV_UIO_GENERIC, + RTE_KDRV_NIC_UIO, + RTE_KDRV_NONE, +}; + +/** + * A structure describing a PCI device. + */ +struct rte_pci_device { + TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_pci_addr addr; /**< PCI location. */ + struct rte_pci_id id; /**< PCI ID. */ + struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ + struct rte_intr_handle intr_handle; /**< Interrupt handle */ + struct rte_pci_driver *driver; /**< Associated driver */ + uint16_t max_vfs; /**< sriov enable if not zero */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ +}; + +/** Any PCI device identifier (vendor, device, ...) */ +#define PCI_ANY_ID (0xffff) + +#ifdef __cplusplus +/** C++ macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + (vend), \ + (dev), \ + PCI_ANY_ID, \ + PCI_ANY_ID +#else +/** Macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + .vendor_id = (vend), \ + .device_id = (dev), \ + .subsystem_vendor_id = PCI_ANY_ID, \ + .subsystem_device_id = PCI_ANY_ID +#endif + +struct rte_pci_driver; + +/** + * Initialisation function for the driver called during PCI probing. + */ +typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *); + +/** + * Uninitialisation function for the driver called during hotplugging. + */ +typedef int (pci_devuninit_t)(struct rte_pci_device *); + +/** + * A structure describing a PCI driver. + */ +struct rte_pci_driver { + TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ + const char *name; /**< Driver name. */ + pci_devinit_t *devinit; /**< Device init. function. */ + pci_devuninit_t *devuninit; /**< Device uninit function. */ + const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ + uint32_t drv_flags; /**< Flags contolling handling of device. */ +}; + +/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ +#define RTE_PCI_DRV_NEED_MAPPING 0x0001 +/** Device driver must be registered several times until failure - deprecated */ +#pragma GCC poison RTE_PCI_DRV_MULTIPLE +/** Device needs to be unbound even if no module is provided */ +#define RTE_PCI_DRV_FORCE_UNBIND 0x0004 +/** Device driver supports link state interrupt */ +#define RTE_PCI_DRV_INTR_LSC 0x0008 +/** Device driver supports detaching capability */ +#define RTE_PCI_DRV_DETACHABLE 0x0010 + +/** + * A structure describing a PCI mapping. + */ +struct pci_map { + void *addr; + char *path; + uint64_t offset; + uint64_t size; + uint64_t phaddr; +}; + +/** + * A structure describing a mapped PCI resource. + * For multi-process we need to reproduce all PCI mappings in secondary + * processes, so save them in a tailq. + */ +struct mapped_pci_resource { + TAILQ_ENTRY(mapped_pci_resource) next; + + struct rte_pci_addr pci_addr; + char path[PATH_MAX]; + int nb_maps; + struct pci_map maps[PCI_MAX_RESOURCE]; +}; + +/** mapped pci device list */ +TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); + +/**< Internal use only - Macro used by pci addr parsing functions **/ +#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \ +do { \ + unsigned long val; \ + char *end; \ + errno = 0; \ + val = strtoul((in), &end, 16); \ + if (errno != 0 || end[0] != (dlm) || val > (lim)) \ + return -EINVAL; \ + (fd) = (typeof (fd))val; \ + (in) = end + 1; \ +} while(0) + +/** + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided without + * a domain prefix (i.e. domain returned is always 0) + * + * @param input + * The input string to be parsed. Should have the format XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned. Domain will always be + * returned as 0 + * @return + * 0 on success, negative on error. + */ +static inline int +eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr) +{ + dev_addr->domain = 0; + GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); + GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); + return 0; +} + +/** + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided including + * a domain prefix. + * + * @param input + * The input string to be parsed. Should have the format XXXX:XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned + * @return + * 0 on success, negative on error. + */ +static inline int +eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) +{ + GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); + GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); + return 0; +} +#undef GET_PCIADDR_FIELD + +/* Compare two PCI device addresses. */ +/** + * Utility function to compare two PCI device addresses. + * + * @param addr + * The PCI Bus-Device-Function address to compare + * @param addr2 + * The PCI Bus-Device-Function address to compare + * @return + * 0 on equal PCI address. + * Positive on addr is greater than addr2. + * Negative on addr is less than addr2, or error. + */ +static inline int +rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, + const struct rte_pci_addr *addr2) +{ + uint64_t dev_addr, dev_addr2; + + if ((addr == NULL) || (addr2 == NULL)) + return -1; + + dev_addr = (addr->domain << 24) | (addr->bus << 16) | + (addr->devid << 8) | addr->function; + dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) | + (addr2->devid << 8) | addr2->function; + + if (dev_addr > dev_addr2) + return 1; + else if (dev_addr < dev_addr2) + return -1; + else + return 0; +} + +/** + * Scan the content of the PCI bus, and the devices in the devices + * list + * + * @return + * 0 on success, negative on error + */ +int rte_eal_pci_scan(void); + +/** + * Probe the PCI bus for registered drivers. + * + * Scan the content of the PCI bus, and call the probe() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_probe(void); + +/** + * Map the PCI device resources in user space virtual memory address + * + * Note that driver should not call this function when flag + * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for + * you when it's on. + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + * + * @return + * 0 on success, negative on error and positive if no driver + * is found for the device. + */ +int rte_eal_pci_map_device(struct rte_pci_device *dev); + +/** + * Unmap this device + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +void rte_eal_pci_unmap_device(struct rte_pci_device *dev); + +/** + * @internal + * Map a particular resource from a file. + * + * @param requested_addr + * The starting address for the new mapping range. + * @param fd + * The file descriptor. + * @param offset + * The offset for the mapping range. + * @param size + * The size for the mapping range. + * @param additional_flags + * The additional flags for the mapping range. + * @return + * - On success, the function returns a pointer to the mapped area. + * - On error, the value MAP_FAILED is returned. + */ +void *pci_map_resource(void *requested_addr, int fd, off_t offset, + size_t size, int additional_flags); + +/** + * @internal + * Unmap a particular resource. + * + * @param requested_addr + * The address for the unmapping range. + * @param size + * The size for the unmapping range. + */ +void pci_unmap_resource(void *requested_addr, size_t size); + +/** + * Probe the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the probe() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to probe. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); + +/** + * Close the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the devuninit() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to close. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_detach(const struct rte_pci_addr *addr); + +/** + * Dump the content of the PCI bus. + * + * @param f + * A pointer to a file for output + */ +void rte_eal_pci_dump(FILE *f); + +/** + * Register a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be registered. + */ +void rte_eal_pci_register(struct rte_pci_driver *driver); + +/** + * Unregister a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be unregistered. + */ +void rte_eal_pci_unregister(struct rte_pci_driver *driver); + +/** + * Read PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_eal_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset); + +/** + * Write PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer containing the bytes should be written + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_eal_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset); + +/** + * A structure used to access io resources for a pci device. + * rte_pci_ioport is arch, os, driver specific, and should not be used outside + * of pci ioport api. + */ +struct rte_pci_ioport { + struct rte_pci_device *dev; + uint64_t base; +}; + +/** + * Initialises a rte_pci_ioport object for a pci device io resource. + * This object is then used to gain access to those io resources (see below). + * + * @param dev + * A pointer to a rte_pci_device structure describing the device. + * to use + * @param bar + * Index of the io pci resource we want to access. + * @param p + * The rte_pci_ioport object to be initialized. + * @return + * 0 on success, negative on error. + */ +int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); + +/** + * Release any resources used in a rte_pci_ioport object. + * + * @param p + * The rte_pci_ioport object to be uninitialized. + */ +int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p); + +/** + * Read from a io pci resource. + * + * @param p + * The rte_pci_ioport object from which we want to read. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_eal_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); + +/** + * Write to a io pci resource. + * + * @param p + * The rte_pci_ioport object to which we want to write. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_eal_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); + +#ifdef RTE_PCI_CONFIG +#include +/** + * Set special config space registers for performance purpose. + * It is deprecated, as all configurations have been moved into + * each PMDs respectively. + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +void pci_config_space_set(struct rte_pci_device *dev) __rte_deprecated; +#endif /* RTE_PCI_CONFIG */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PCI_H_ */ diff --git a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h new file mode 100644 index 00000000..08222510 --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h @@ -0,0 +1,70 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_DEV_DEFS_H_ +#define _RTE_PCI_DEV_DEFS_H_ + +/* interrupt mode */ +enum rte_intr_mode { + RTE_INTR_MODE_NONE = 0, + RTE_INTR_MODE_LEGACY, + RTE_INTR_MODE_MSI, + RTE_INTR_MODE_MSIX +}; + +#endif /* _RTE_PCI_DEV_DEFS_H_ */ diff --git a/lib/librte_eal/common/include/rte_pci_dev_features.h b/lib/librte_eal/common/include/rte_pci_dev_features.h new file mode 100644 index 00000000..67b986a6 --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci_dev_features.h @@ -0,0 +1,69 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_DEV_FEATURES_H +#define _RTE_PCI_DEV_FEATURES_H + +#include + +#define RTE_INTR_MODE_NONE_NAME "none" +#define RTE_INTR_MODE_LEGACY_NAME "legacy" +#define RTE_INTR_MODE_MSI_NAME "msi" +#define RTE_INTR_MODE_MSIX_NAME "msix" + +#endif diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h new file mode 100644 index 00000000..cf7b5487 --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h @@ -0,0 +1,704 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/** + * @file + * + * This file contains a list of the PCI device IDs recognised by DPDK, which + * can be used to fill out an array of structures describing the devices. + * + * Currently four families of devices are recognised: those supported by the + * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio + * driver which is a para virtualization driver running in guest virtual machine. + * The inclusion of these in an array built using this file depends on the + * definition of + * RTE_PCI_DEV_ID_DECL_EM + * RTE_PCI_DEV_ID_DECL_IGB + * RTE_PCI_DEV_ID_DECL_IGBVF + * RTE_PCI_DEV_ID_DECL_IXGBE + * RTE_PCI_DEV_ID_DECL_IXGBEVF + * RTE_PCI_DEV_ID_DECL_I40E + * RTE_PCI_DEV_ID_DECL_I40EVF + * RTE_PCI_DEV_ID_DECL_VIRTIO + * at the time when this file is included. + * + * In order to populate an array, the user of this file must define this macro: + * RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID). For example: + * + * @code + * struct device { + * int vend; + * int dev; + * }; + * + * struct device devices[] = { + * #define RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID) {vend, dev}, + * #include + * }; + * @endcode + * + * Note that this file can be included multiple times within the same file. + */ + +#ifndef RTE_PCI_DEV_ID_DECL_EM +#define RTE_PCI_DEV_ID_DECL_EM(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IGB +#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IGBVF +#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IXGBE +#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF +#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_I40E +#define RTE_PCI_DEV_ID_DECL_I40E(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_I40EVF +#define RTE_PCI_DEV_ID_DECL_I40EVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_VIRTIO +#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_VMXNET3 +#define RTE_PCI_DEV_ID_DECL_VMXNET3(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_FM10K +#define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_FM10KVF +#define RTE_PCI_DEV_ID_DECL_FM10KVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_ENIC +#define RTE_PCI_DEV_ID_DECL_ENIC(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_BNX2X +#define RTE_PCI_DEV_ID_DECL_BNX2X(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_BNX2XVF +#define RTE_PCI_DEV_ID_DECL_BNX2XVF(vend, dev) +#endif + +#ifndef PCI_VENDOR_ID_INTEL +/** Vendor ID used by Intel devices */ +#define PCI_VENDOR_ID_INTEL 0x8086 +#endif + +#ifndef PCI_VENDOR_ID_QUMRANET +/** Vendor ID used by virtio devices */ +#define PCI_VENDOR_ID_QUMRANET 0x1AF4 +#endif + +#ifndef PCI_VENDOR_ID_VMWARE +/** Vendor ID used by VMware devices */ +#define PCI_VENDOR_ID_VMWARE 0x15AD +#endif + +#ifndef PCI_VENDOR_ID_CISCO +/** Vendor ID used by Cisco VIC devices */ +#define PCI_VENDOR_ID_CISCO 0x1137 +#endif + +#ifndef PCI_VENDOR_ID_BROADCOM +/** Vendor ID used by Broadcom devices */ +#define PCI_VENDOR_ID_BROADCOM 0x14E4 +#endif + +/******************** Physical EM devices from e1000_hw.h ********************/ + +#define E1000_DEV_ID_82542 0x1000 +#define E1000_DEV_ID_82543GC_FIBER 0x1001 +#define E1000_DEV_ID_82543GC_COPPER 0x1004 +#define E1000_DEV_ID_82544EI_COPPER 0x1008 +#define E1000_DEV_ID_82544EI_FIBER 0x1009 +#define E1000_DEV_ID_82544GC_COPPER 0x100C +#define E1000_DEV_ID_82544GC_LOM 0x100D +#define E1000_DEV_ID_82540EM 0x100E +#define E1000_DEV_ID_82540EM_LOM 0x1015 +#define E1000_DEV_ID_82540EP_LOM 0x1016 +#define E1000_DEV_ID_82540EP 0x1017 +#define E1000_DEV_ID_82540EP_LP 0x101E +#define E1000_DEV_ID_82545EM_COPPER 0x100F +#define E1000_DEV_ID_82545EM_FIBER 0x1011 +#define E1000_DEV_ID_82545GM_COPPER 0x1026 +#define E1000_DEV_ID_82545GM_FIBER 0x1027 +#define E1000_DEV_ID_82545GM_SERDES 0x1028 +#define E1000_DEV_ID_82546EB_COPPER 0x1010 +#define E1000_DEV_ID_82546EB_FIBER 0x1012 +#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D +#define E1000_DEV_ID_82546GB_COPPER 0x1079 +#define E1000_DEV_ID_82546GB_FIBER 0x107A +#define E1000_DEV_ID_82546GB_SERDES 0x107B +#define E1000_DEV_ID_82546GB_PCIE 0x108A +#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 +#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 +#define E1000_DEV_ID_82541EI 0x1013 +#define E1000_DEV_ID_82541EI_MOBILE 0x1018 +#define E1000_DEV_ID_82541ER_LOM 0x1014 +#define E1000_DEV_ID_82541ER 0x1078 +#define E1000_DEV_ID_82541GI 0x1076 +#define E1000_DEV_ID_82541GI_LF 0x107C +#define E1000_DEV_ID_82541GI_MOBILE 0x1077 +#define E1000_DEV_ID_82547EI 0x1019 +#define E1000_DEV_ID_82547EI_MOBILE 0x101A +#define E1000_DEV_ID_82547GI 0x1075 +#define E1000_DEV_ID_82571EB_COPPER 0x105E +#define E1000_DEV_ID_82571EB_FIBER 0x105F +#define E1000_DEV_ID_82571EB_SERDES 0x1060 +#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 +#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA +#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 +#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 +#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 +#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC +#define E1000_DEV_ID_82572EI_COPPER 0x107D +#define E1000_DEV_ID_82572EI_FIBER 0x107E +#define E1000_DEV_ID_82572EI_SERDES 0x107F +#define E1000_DEV_ID_82572EI 0x10B9 +#define E1000_DEV_ID_82573E 0x108B +#define E1000_DEV_ID_82573E_IAMT 0x108C +#define E1000_DEV_ID_82573L 0x109A +#define E1000_DEV_ID_82574L 0x10D3 +#define E1000_DEV_ID_82574LA 0x10F6 +#define E1000_DEV_ID_82583V 0x150C +#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 +#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 +#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA +#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB +#define E1000_DEV_ID_ICH8_82567V_3 0x1501 +#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 +#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A +#define E1000_DEV_ID_ICH8_IGP_C 0x104B +#define E1000_DEV_ID_ICH8_IFE 0x104C +#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 +#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 +#define E1000_DEV_ID_ICH8_IGP_M 0x104D +#define E1000_DEV_ID_ICH9_IGP_M 0x10BF +#define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 +#define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB +#define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD +#define E1000_DEV_ID_ICH9_BM 0x10E5 +#define E1000_DEV_ID_ICH9_IGP_C 0x294C +#define E1000_DEV_ID_ICH9_IFE 0x10C0 +#define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 +#define E1000_DEV_ID_ICH9_IFE_G 0x10C2 +#define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC +#define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD +#define E1000_DEV_ID_ICH10_R_BM_V 0x10CE +#define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE +#define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF +#define E1000_DEV_ID_ICH10_D_BM_V 0x1525 + +#define E1000_DEV_ID_PCH_M_HV_LM 0x10EA +#define E1000_DEV_ID_PCH_M_HV_LC 0x10EB +#define E1000_DEV_ID_PCH_D_HV_DM 0x10EF +#define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 +#define E1000_DEV_ID_PCH2_LV_LM 0x1502 +#define E1000_DEV_ID_PCH2_LV_V 0x1503 +#define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A +#define E1000_DEV_ID_PCH_LPT_I217_V 0x153B +#define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A +#define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 +#define E1000_DEV_ID_PCH_I218_LM2 0x15A0 +#define E1000_DEV_ID_PCH_I218_V2 0x15A1 +#define E1000_DEV_ID_PCH_I218_LM3 0x15A2 +#define E1000_DEV_ID_PCH_I218_V3 0x15A3 + + +/* + * Tested (supported) on VM emulated HW. + */ + +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82540EM) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_FIBER) + +/* + * Tested (supported) on real HW. + */ + +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_SERDES) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82573L) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574L) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574LA) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82583V) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_LM) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_V) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM2) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V2) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM3) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V3) + + +/******************** Physical IGB devices from e1000_hw.h ********************/ + +#define E1000_DEV_ID_82576 0x10C9 +#define E1000_DEV_ID_82576_FIBER 0x10E6 +#define E1000_DEV_ID_82576_SERDES 0x10E7 +#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 +#define E1000_DEV_ID_82576_NS 0x150A +#define E1000_DEV_ID_82576_NS_SERDES 0x1518 +#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D +#define E1000_DEV_ID_82575EB_COPPER 0x10A7 +#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 +#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 +#define E1000_DEV_ID_82580_COPPER 0x150E +#define E1000_DEV_ID_82580_FIBER 0x150F +#define E1000_DEV_ID_82580_SERDES 0x1510 +#define E1000_DEV_ID_82580_SGMII 0x1511 +#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 +#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 +#define E1000_DEV_ID_I350_COPPER 0x1521 +#define E1000_DEV_ID_I350_FIBER 0x1522 +#define E1000_DEV_ID_I350_SERDES 0x1523 +#define E1000_DEV_ID_I350_SGMII 0x1524 +#define E1000_DEV_ID_I350_DA4 0x1546 +#define E1000_DEV_ID_I210_COPPER 0x1533 +#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 +#define E1000_DEV_ID_I210_COPPER_IT 0x1535 +#define E1000_DEV_ID_I210_FIBER 0x1536 +#define E1000_DEV_ID_I210_SERDES 0x1537 +#define E1000_DEV_ID_I210_SGMII 0x1538 +#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B +#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C +#define E1000_DEV_ID_I211_COPPER 0x1539 +#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 +#define E1000_DEV_ID_I354_SGMII 0x1F41 +#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 +#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 +#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A +#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C +#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP) + +/****************** Physical IXGBE devices from ixgbe_type.h ******************/ + +#define IXGBE_DEV_ID_82598 0x10B6 +#define IXGBE_DEV_ID_82598_BX 0x1508 +#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 +#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 +#define IXGBE_DEV_ID_82598AT 0x10C8 +#define IXGBE_DEV_ID_82598AT2 0x150B +#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB +#define IXGBE_DEV_ID_82598EB_CX4 0x10DD +#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC +#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 +#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 +#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 +#define IXGBE_DEV_ID_82599_KX4 0x10F7 +#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 +#define IXGBE_DEV_ID_82599_KR 0x1517 +#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 +#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C +#define IXGBE_DEV_ID_82599_CX4 0x10F9 +#define IXGBE_DEV_ID_82599_SFP 0x10FB +#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 +#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 +#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 +#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 +#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A +#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 +#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 +#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D +#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A +#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 +#define IXGBE_DEV_ID_82599EN_SFP 0x1557 +#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC +#define IXGBE_DEV_ID_82599_T3_LOM 0x151C +#define IXGBE_DEV_ID_82599_LS 0x154F +#define IXGBE_DEV_ID_X540T 0x1528 +#define IXGBE_DEV_ID_X540T1 0x1560 +#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC +#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD +#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE +#define IXGBE_DEV_ID_X550T 0x1563 +#define IXGBE_DEV_ID_X550T1 0x15D1 +#define IXGBE_DEV_ID_X550EM_A_KR 0x15C2 +#define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3 +#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 +#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15C6 +#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15C7 +#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 +#define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA +#define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC +#define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE +#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA +#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB + +#ifdef RTE_NIC_BYPASS +#define IXGBE_DEV_ID_82599_BYPASS 0x155D +#endif + +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82598AF_SINGLE_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82599_COMBO_BACKPLANE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR) + +#ifdef RTE_NIC_BYPASS +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS) +#endif + +/*************** Physical I40E devices from i40e_type.h *****************/ + +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QEMU 0x1574 +#define I40E_DEV_ID_KX_B 0x1580 +#define I40E_DEV_ID_KX_C 0x1581 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_20G_KR2 0x1587 +#define I40E_DEV_ID_20G_KR2_A 0x1588 +#define I40E_DEV_ID_10G_BASE_T4 0x1589 +#define I40E_DEV_ID_X722_A0 0x374C +#define I40E_DEV_ID_KX_X722 0x37CE +#define I40E_DEV_ID_QSFP_X722 0x37CF +#define I40E_DEV_ID_SFP_X722 0x37D0 +#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 +#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 + +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_B) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_C) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_A) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_B) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_C) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2_A) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T4) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_1G_BASE_T_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T_X722) + +/*************** Physical FM10K devices from fm10k_type.h ***************/ + +#define FM10K_DEV_ID_PF 0x15A4 +#define FM10K_DEV_ID_SDI_FM10420_QDA2 0x15D0 + +RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_PF) +RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2) + +/****************** Virtual IGB devices from e1000_hw.h ******************/ + +#define E1000_DEV_ID_82576_VF 0x10CA +#define E1000_DEV_ID_82576_VF_HV 0x152D +#define E1000_DEV_ID_I350_VF 0x1520 +#define E1000_DEV_ID_I350_VF_HV 0x152F + +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV) + +/****************** Virtual IXGBE devices from ixgbe_type.h ******************/ + +#define IXGBE_DEV_ID_82599_VF 0x10ED +#define IXGBE_DEV_ID_82599_VF_HV 0x152E +#define IXGBE_DEV_ID_X540_VF 0x1515 +#define IXGBE_DEV_ID_X540_VF_HV 0x1530 +#define IXGBE_DEV_ID_X550_VF_HV 0x1564 +#define IXGBE_DEV_ID_X550_VF 0x1565 +#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 +#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4 +#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 +#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 + +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV) + +/****************** Virtual I40E devices from i40e_type.h ********************/ + +#define I40E_DEV_ID_VF 0x154C +#define I40E_DEV_ID_VF_HV 0x1571 +#define I40E_DEV_ID_X722_A0_VF 0x374D +#define I40E_DEV_ID_X722_VF 0x37CD +#define I40E_DEV_ID_X722_VF_HV 0x37D9 + +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF_HV) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF_HV) + +/****************** Virtio devices from virtio.h ******************/ + +#define QUMRANET_DEV_ID_VIRTIO 0x1000 + +RTE_PCI_DEV_ID_DECL_VIRTIO(PCI_VENDOR_ID_QUMRANET, QUMRANET_DEV_ID_VIRTIO) + +/****************** VMware VMXNET3 devices ******************/ + +#define VMWARE_DEV_ID_VMXNET3 0x07B0 + +RTE_PCI_DEV_ID_DECL_VMXNET3(PCI_VENDOR_ID_VMWARE, VMWARE_DEV_ID_VMXNET3) + +/*************** Virtual FM10K devices from fm10k_type.h ***************/ + +#define FM10K_DEV_ID_VF 0x15A5 + +RTE_PCI_DEV_ID_DECL_FM10KVF(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_VF) + +/****************** Cisco VIC devices ******************/ + +#define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ + +RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) +RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) + +/****************** QLogic devices ******************/ + +/* Broadcom/QLogic BNX2X */ +#define BNX2X_DEV_ID_57710 0x164e +#define BNX2X_DEV_ID_57711 0x164f +#define BNX2X_DEV_ID_57711E 0x1650 +#define BNX2X_DEV_ID_57712 0x1662 +#define BNX2X_DEV_ID_57712_MF 0x1663 +#define BNX2X_DEV_ID_57712_VF 0x166f +#define BNX2X_DEV_ID_57713 0x1651 +#define BNX2X_DEV_ID_57713E 0x1652 +#define BNX2X_DEV_ID_57800 0x168a +#define BNX2X_DEV_ID_57800_MF 0x16a5 +#define BNX2X_DEV_ID_57800_VF 0x16a9 +#define BNX2X_DEV_ID_57810 0x168e +#define BNX2X_DEV_ID_57810_MF 0x16ae +#define BNX2X_DEV_ID_57810_VF 0x16af +#define BNX2X_DEV_ID_57811 0x163d +#define BNX2X_DEV_ID_57811_MF 0x163e +#define BNX2X_DEV_ID_57811_VF 0x163f + +#define BNX2X_DEV_ID_57840_OBS 0x168d +#define BNX2X_DEV_ID_57840_OBS_MF 0x16ab +#define BNX2X_DEV_ID_57840_4_10 0x16a1 +#define BNX2X_DEV_ID_57840_2_20 0x16a2 +#define BNX2X_DEV_ID_57840_MF 0x16a4 +#define BNX2X_DEV_ID_57840_VF 0x16ad + +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57711) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_OBS) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_4_10) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_2_20) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_VF) +#ifdef RTE_LIBRTE_BNX2X_MF_SUPPORT +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_MF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_MF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF) +#endif + +/* + * Undef all RTE_PCI_DEV_ID_DECL_* here. + */ +#undef RTE_PCI_DEV_ID_DECL_BNX2X +#undef RTE_PCI_DEV_ID_DECL_BNX2XVF +#undef RTE_PCI_DEV_ID_DECL_EM +#undef RTE_PCI_DEV_ID_DECL_IGB +#undef RTE_PCI_DEV_ID_DECL_IGBVF +#undef RTE_PCI_DEV_ID_DECL_IXGBE +#undef RTE_PCI_DEV_ID_DECL_IXGBEVF +#undef RTE_PCI_DEV_ID_DECL_I40E +#undef RTE_PCI_DEV_ID_DECL_I40EVF +#undef RTE_PCI_DEV_ID_DECL_VIRTIO +#undef RTE_PCI_DEV_ID_DECL_VMXNET3 +#undef RTE_PCI_DEV_ID_DECL_FM10K +#undef RTE_PCI_DEV_ID_DECL_FM10KVF diff --git a/lib/librte_eal/common/include/rte_per_lcore.h b/lib/librte_eal/common/include/rte_per_lcore.h new file mode 100644 index 00000000..5434729a --- /dev/null +++ b/lib/librte_eal/common/include/rte_per_lcore.h @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PER_LCORE_H_ +#define _RTE_PER_LCORE_H_ + +/** + * @file + * + * Per-lcore variables in RTE + * + * This file defines an API for instantiating per-lcore "global + * variables" that are environment-specific. Note that in all + * environments, a "shared variable" is the default when you use a + * global variable. + * + * Parts of this are execution environment specific. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Macro to define a per lcore variable "var" of type "type", don't + * use keywords like "static" or "volatile" in type, just prefix the + * whole macro. + */ +#define RTE_DEFINE_PER_LCORE(type, name) \ + __thread __typeof__(type) per_lcore_##name + +/** + * Macro to declare an extern per lcore variable "var" of type "type" + */ +#define RTE_DECLARE_PER_LCORE(type, name) \ + extern __thread __typeof__(type) per_lcore_##name + +/** + * Read/write the per-lcore variable value + */ +#define RTE_PER_LCORE(name) (per_lcore_##name) + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PER_LCORE_H_ */ diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h new file mode 100644 index 00000000..24ae8363 --- /dev/null +++ b/lib/librte_eal/common/include/rte_random.h @@ -0,0 +1,91 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RANDOM_H_ +#define _RTE_RANDOM_H_ + +/** + * @file + * + * Pseudo-random Generators in RTE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * Seed the pseudo-random generator. + * + * The generator is automatically seeded by the EAL init with a timer + * value. It may need to be re-seeded by the user with a real random + * value. + * + * @param seedval + * The value of the seed. + */ +static inline void +rte_srand(uint64_t seedval) +{ + srand48((long unsigned int)seedval); +} + +/** + * Get a pseudo-random value. + * + * This function generates pseudo-random numbers using the linear + * congruential algorithm and 48-bit integer arithmetic, called twice + * to generate a 64-bit value. + * + * @return + * A pseudo-random value between 0 and (1<<64)-1. + */ +static inline uint64_t +rte_rand(void) +{ + uint64_t val; + val = lrand48(); + val <<= 32; + val += lrand48(); + return val; +} + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_PER_LCORE_H_ */ diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h new file mode 100644 index 00000000..cfca2f8d --- /dev/null +++ b/lib/librte_eal/common/include/rte_string_fns.h @@ -0,0 +1,81 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * String-related functions as replacement for libc equivalents + */ + +#ifndef _RTE_STRING_FNS_H_ +#define _RTE_STRING_FNS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Takes string "string" parameter and splits it at character "delim" + * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like + * strtok or strsep functions, this modifies its input string, by replacing + * instances of "delim" with '\\0'. All resultant tokens are returned in the + * "tokens" array which must have enough entries to hold "maxtokens". + * + * @param string + * The input string to be split into tokens + * + * @param stringlen + * The max length of the input buffer + * + * @param tokens + * The array to hold the pointers to the tokens in the string + * + * @param maxtokens + * The number of elements in the tokens array. At most, maxtokens-1 splits + * of the string will be done. + * + * @param delim + * The character on which the split of the data will be done + * + * @return + * The number of tokens in the tokens array. + */ +int +rte_strsplit(char *string, int stringlen, + char **tokens, int maxtokens, char delim); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_STRING_FNS_H */ diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h new file mode 100644 index 00000000..4a686e68 --- /dev/null +++ b/lib/librte_eal/common/include/rte_tailq.h @@ -0,0 +1,162 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_TAILQ_H_ +#define _RTE_TAILQ_H_ + +/** + * @file + * Here defines rte_tailq APIs for only internal use + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** dummy structure type used by the rte_tailq APIs */ +struct rte_tailq_entry { + TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */ + void *data; /**< Pointer to the data referenced by this tailq entry */ +}; +/** dummy */ +TAILQ_HEAD(rte_tailq_entry_head, rte_tailq_entry); + +#define RTE_TAILQ_NAMESIZE 32 + +/** + * The structure defining a tailq header entry for storing + * in the rte_config structure in shared memory. Each tailq + * is identified by name. + * Any library storing a set of objects e.g. rings, mempools, hash-tables, + * is recommended to use an entry here, so as to make it easy for + * a multi-process app to find already-created elements in shared memory. + */ +struct rte_tailq_head { + struct rte_tailq_entry_head tailq_head; /**< NOTE: must be first element */ + char name[RTE_TAILQ_NAMESIZE]; +}; + +struct rte_tailq_elem { + /** + * Reference to head in shared mem, updated at init time by + * rte_eal_tailqs_init() + */ + struct rte_tailq_head *head; + TAILQ_ENTRY(rte_tailq_elem) next; + const char name[RTE_TAILQ_NAMESIZE]; +}; + +/** + * Return the first tailq entry casted to the right struct. + */ +#define RTE_TAILQ_CAST(tailq_entry, struct_name) \ + (struct struct_name *)&(tailq_entry)->tailq_head + +/** + * Utility macro to make looking up a tailqueue for a particular struct easier. + * + * @param name + * The name of tailq + * + * @param struct_name + * The name of the list type we are using. (Generally this is the same as the + * first parameter passed to TAILQ_HEAD macro) + * + * @return + * The return value from rte_eal_tailq_lookup, typecast to the appropriate + * structure pointer type. + * NULL on error, since the tailq_head is the first + * element in the rte_tailq_head structure. + */ +#define RTE_TAILQ_LOOKUP(name, struct_name) \ + RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) + +/** + * Dump tail queues to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_dump_tailq(FILE *f); + +/** + * Lookup for a tail queue. + * + * Get a pointer to a tail queue header of a tail + * queue identified by the name given as an argument. + * Note: this function is not multi-thread safe, and should only be called from + * a single thread at a time + * + * @param name + * The name of the queue. + * @return + * A pointer to the tail queue head structure. + */ +struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); + +/** + * Register a tail queue. + * + * Register a tail queue from shared memory. + * This function is mainly used by EAL_REGISTER_TAILQ macro which is used to + * register tailq from the different dpdk libraries. Since this macro is a + * constructor, the function has no access to dpdk shared memory, so the + * registered tailq can not be used before call to rte_eal_init() which calls + * rte_eal_tailqs_init(). + * + * @param t + * The tailq element which contains the name of the tailq you want to + * create (/retrieve when in secondary process). + * @return + * 0 on success or -1 in case of an error. + */ +int rte_eal_tailq_register(struct rte_tailq_elem *t); + +#define EAL_REGISTER_TAILQ(t) \ +void tailqinitfn_ ##t(void); \ +void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \ +{ \ + if (rte_eal_tailq_register(&t) < 0) \ + rte_panic("Cannot initialize tailq: %s\n", t.name); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_TAILQ_H_ */ diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h new file mode 100644 index 00000000..4b13b9c1 --- /dev/null +++ b/lib/librte_eal/common/include/rte_time.h @@ -0,0 +1,122 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define NSEC_PER_SEC 1000000000L + +/** + * Structure to hold the parameters of a running cycle counter to assist + * in converting cycles to nanoseconds. + */ +struct rte_timecounter { + /** Last cycle counter value read. */ + uint64_t cycle_last; + /** Nanoseconds count. */ + uint64_t nsec; + /** Bitmask separating nanosecond and sub-nanoseconds. */ + uint64_t nsec_mask; + /** Sub-nanoseconds count. */ + uint64_t nsec_frac; + /** Bitmask for two's complement substraction of non-64 bit counters. */ + uint64_t cc_mask; + /** Cycle to nanosecond divisor (power of two). */ + uint32_t cc_shift; +}; + +/** + * Converts cyclecounter cycles to nanoseconds. + */ +static inline uint64_t +rte_cyclecounter_cycles_to_ns(struct rte_timecounter *tc, uint64_t cycles) +{ + uint64_t ns; + + /* Add fractional nanoseconds. */ + ns = cycles + tc->nsec_frac; + tc->nsec_frac = ns & tc->nsec_mask; + + /* Shift to get only nanoseconds. */ + return ns >> tc->cc_shift; +} + +/** + * Update the internal nanosecond count in the structure. + */ +static inline uint64_t +rte_timecounter_update(struct rte_timecounter *tc, uint64_t cycle_now) +{ + uint64_t cycle_delta, ns_offset; + + /* Calculate the delta since the last call. */ + if (tc->cycle_last <= cycle_now) + cycle_delta = (cycle_now - tc->cycle_last) & tc->cc_mask; + else + /* Handle cycle counts that have wrapped around . */ + cycle_delta = (~(tc->cycle_last - cycle_now) & tc->cc_mask) + 1; + + /* Convert to nanoseconds. */ + ns_offset = rte_cyclecounter_cycles_to_ns(tc, cycle_delta); + + /* Store current cycle counter for next call. */ + tc->cycle_last = cycle_now; + + /* Update the nanosecond count. */ + tc->nsec += ns_offset; + + return tc->nsec; +} + +/** + * Convert from timespec structure into nanosecond units. + */ +static inline uint64_t +rte_timespec_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +/** + * Convert from nanosecond units into timespec structure. + */ +static inline struct timespec +rte_ns_to_timespec(uint64_t nsec) +{ + struct timespec ts = {0, 0}; + + if (nsec == 0) + return ts; + + ts.tv_sec = nsec / NSEC_PER_SEC; + ts.tv_nsec = nsec % NSEC_PER_SEC; + + return ts; +} diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h new file mode 100644 index 00000000..f8ea6d73 --- /dev/null +++ b/lib/librte_eal/common/include/rte_version.h @@ -0,0 +1,130 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definitions of DPDK version numbers + */ + +#ifndef _RTE_VERSION_H_ +#define _RTE_VERSION_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** + * String that appears before the version number + */ +#define RTE_VER_PREFIX "DPDK" + +/** + * Major version/year number i.e. the yy in yy.mm.z + */ +#define RTE_VER_YEAR 16 + +/** + * Minor version/month number i.e. the mm in yy.mm.z + */ +#define RTE_VER_MONTH 4 + +/** + * Patch level number i.e. the z in yy.mm.z + */ +#define RTE_VER_MINOR 0 + +/** + * Extra string to be appended to version number + */ +#define RTE_VER_SUFFIX "" + +/** + * Patch release number + * 0-15 = release candidates + * 16 = release + */ +#define RTE_VER_RELEASE 16 + +/** + * Macro to compute a version number usable for comparisons + */ +#define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d)) + +/** + * All version numbers in one to compare with RTE_VERSION_NUM() + */ +#define RTE_VERSION RTE_VERSION_NUM( \ + RTE_VER_YEAR, \ + RTE_VER_MONTH, \ + RTE_VER_MINOR, \ + RTE_VER_RELEASE) + +/** + * Function returning version string + * @return + * string + */ +static inline const char * +rte_version(void) +{ + static char version[32]; + if (version[0] != 0) + return version; + if (strlen(RTE_VER_SUFFIX) == 0) + snprintf(version, sizeof(version), "%s %d.%02d.%d", + RTE_VER_PREFIX, + RTE_VER_YEAR, + RTE_VER_MONTH, + RTE_VER_MINOR); + else + snprintf(version, sizeof(version), "%s %d.%02d.%d%s%d", + RTE_VER_PREFIX, + RTE_VER_YEAR, + RTE_VER_MONTH, + RTE_VER_MINOR, + RTE_VER_SUFFIX, + RTE_VER_RELEASE < 16 ? + RTE_VER_RELEASE : + RTE_VER_RELEASE - 16); + return version; +} + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_VERSION_H */ diff --git a/lib/librte_eal/common/include/rte_warnings.h b/lib/librte_eal/common/include/rte_warnings.h new file mode 100644 index 00000000..54b545c9 --- /dev/null +++ b/lib/librte_eal/common/include/rte_warnings.h @@ -0,0 +1,84 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definitions of warnings for use of various insecure functions + */ + +#ifndef _RTE_WARNINGS_H_ +#define _RTE_WARNINGS_H_ + +#ifdef RTE_INSECURE_FUNCTION_WARNING + +/* we need to include all used standard header files so that they appear + * _before_ we poison the function names. + */ + +#include +#include +#include +#include +#include +#ifdef RTE_EXEC_ENV_LINUXAPP +#include +#endif + +/* the following function are deemed not fully secure for use e.g. they + * do not always null-terminate arguments */ +#pragma GCC poison sprintf strtok snprintf vsnprintf +#pragma GCC poison strlen strcpy strcat +#pragma GCC poison sscanf + +/* other unsafe functions may be implemented as macros so just undef them */ +#ifdef strsep +#undef strsep +#else +#pragma GCC poison strsep +#endif + +#ifdef strncpy +#undef strncpy +#else +#pragma GCC poison strncpy +#endif + +#ifdef strncat +#undef strncat +#else +#pragma GCC poison strncat +#endif + +#endif + +#endif /* RTE_WARNINGS_H */ diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c new file mode 100644 index 00000000..27e9925d --- /dev/null +++ b/lib/librte_eal/common/malloc_elem.c @@ -0,0 +1,344 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_elem.h" +#include "malloc_heap.h" + +#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE) + +/* + * initialise a general malloc_elem header structure + */ +void +malloc_elem_init(struct malloc_elem *elem, + struct malloc_heap *heap, const struct rte_memseg *ms, size_t size) +{ + elem->heap = heap; + elem->ms = ms; + elem->prev = NULL; + memset(&elem->free_list, 0, sizeof(elem->free_list)); + elem->state = ELEM_FREE; + elem->size = size; + elem->pad = 0; + set_header(elem); + set_trailer(elem); +} + +/* + * initialise a dummy malloc_elem header for the end-of-memseg marker + */ +void +malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) +{ + malloc_elem_init(elem, prev->heap, prev->ms, 0); + elem->prev = prev; + elem->state = ELEM_BUSY; /* mark busy so its never merged */ +} + +/* + * calculate the starting point of where data of the requested size + * and alignment would fit in the current element. If the data doesn't + * fit, return NULL. + */ +static void * +elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + + elem->size - MALLOC_ELEM_TRAILER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } + + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + + /* if the new start point is before the exist start, it won't fit */ + return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; +} + +/* + * use elem_start_pt to determine if we get meet the size and + * alignment request from the current element + */ +int +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + return elem_start_pt(elem, size, align, bound) != NULL; +} + +/* + * split an existing element into two smaller elements at the given + * split_pt parameter. + */ +static void +split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) +{ + struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size); + const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; + const size_t new_elem_size = elem->size - old_elem_size; + + malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size); + split_pt->prev = elem; + next_elem->prev = split_pt; + elem->size = old_elem_size; + set_trailer(elem); +} + +/* + * Given an element size, compute its freelist index. + * We free an element into the freelist containing similarly-sized elements. + * We try to allocate elements starting with the freelist containing + * similarly-sized elements, and if necessary, we search freelists + * containing larger elements. + * + * Example element size ranges for a heap with five free lists: + * heap->free_head[0] - (0 , 2^8] + * heap->free_head[1] - (2^8 , 2^10] + * heap->free_head[2] - (2^10 ,2^12] + * heap->free_head[3] - (2^12, 2^14] + * heap->free_head[4] - (2^14, MAX_SIZE] + */ +size_t +malloc_elem_free_list_index(size_t size) +{ +#define MALLOC_MINSIZE_LOG2 8 +#define MALLOC_LOG2_INCREMENT 2 + + size_t log2; + size_t index; + + if (size <= (1UL << MALLOC_MINSIZE_LOG2)) + return 0; + + /* Find next power of 2 >= size. */ + log2 = sizeof(size) * 8 - __builtin_clzl(size-1); + + /* Compute freelist index, based on log2(size). */ + index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) / + MALLOC_LOG2_INCREMENT; + + return index <= RTE_HEAP_NUM_FREELISTS-1? + index: RTE_HEAP_NUM_FREELISTS-1; +} + +/* + * Add the specified element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem) +{ + size_t idx; + + idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); + elem->state = ELEM_FREE; + LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); +} + +/* + * Remove the specified element from its heap's free list. + */ +static void +elem_free_list_remove(struct malloc_elem *elem) +{ + LIST_REMOVE(elem, free_list); +} + +/* + * reserve a block of data in an existing malloc_elem. If the malloc_elem + * is much larger than the data block requested, we split the element in two. + * This function is only called from malloc_heap_alloc so parameter checking + * is not done here, as it's done there previously. + */ +struct malloc_elem * +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; + const size_t trailer_size = elem->size - old_elem_size - size - + MALLOC_ELEM_OVERHEAD; + + elem_free_list_remove(elem); + + if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* split it, too much free space after elem */ + struct malloc_elem *new_free_elem = + RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); + + split_elem(elem, new_free_elem); + malloc_elem_free_list_insert(new_free_elem); + } + + if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* don't split it, pad the element instead */ + elem->state = ELEM_BUSY; + elem->pad = old_elem_size; + + /* put a dummy header in padding, to point to real element header */ + if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything + * is cache-line aligned */ + new_elem->pad = elem->pad; + new_elem->state = ELEM_PAD; + new_elem->size = elem->size - elem->pad; + set_header(new_elem); + } + + return new_elem; + } + + /* we are going to split the element in two. The original element + * remains free, and the new element is the one allocated. + * Re-insert original element, in case its new size makes it + * belong on a different list. + */ + split_elem(elem, new_elem); + new_elem->state = ELEM_BUSY; + malloc_elem_free_list_insert(elem); + + return new_elem; +} + +/* + * joing two struct malloc_elem together. elem1 and elem2 must + * be contiguous in memory. + */ +static inline void +join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) +{ + struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size); + elem1->size += elem2->size; + next->prev = elem1; +} + +/* + * free a malloc_elem block by adding it to the free list. If the + * blocks either immediately before or immediately after newly freed block + * are also free, the blocks are merged together. + */ +int +malloc_elem_free(struct malloc_elem *elem) +{ + if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) + return -1; + + rte_spinlock_lock(&(elem->heap->lock)); + struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); + if (next->state == ELEM_FREE){ + /* remove from free list, join to this one */ + elem_free_list_remove(next); + join_elem(elem, next); + } + + /* check if previous element is free, if so join with it and return, + * need to re-insert in free list, as that element's size is changing + */ + if (elem->prev != NULL && elem->prev->state == ELEM_FREE) { + elem_free_list_remove(elem->prev); + join_elem(elem->prev, elem); + malloc_elem_free_list_insert(elem->prev); + } + /* otherwise add ourselves to the free list */ + else { + malloc_elem_free_list_insert(elem); + elem->pad = 0; + } + /* decrease heap's count of allocated elements */ + elem->heap->alloc_count--; + rte_spinlock_unlock(&(elem->heap->lock)); + + return 0; +} + +/* + * attempt to resize a malloc_elem by expanding into any free space + * immediately after it in memory. + */ +int +malloc_elem_resize(struct malloc_elem *elem, size_t size) +{ + const size_t new_size = size + MALLOC_ELEM_OVERHEAD; + /* if we request a smaller size, then always return ok */ + const size_t current_size = elem->size - elem->pad; + if (current_size >= new_size) + return 0; + + struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); + rte_spinlock_lock(&elem->heap->lock); + if (next ->state != ELEM_FREE) + goto err_return; + if (current_size + next->size < new_size) + goto err_return; + + /* we now know the element fits, so remove from free list, + * join the two + */ + elem_free_list_remove(next); + join_elem(elem, next); + + if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){ + /* now we have a big block together. Lets cut it down a bit, by splitting */ + struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); + split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE); + split_elem(elem, split_pt); + malloc_elem_free_list_insert(split_pt); + } + rte_spinlock_unlock(&elem->heap->lock); + return 0; + +err_return: + rte_spinlock_unlock(&elem->heap->lock); + return -1; +} diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h new file mode 100644 index 00000000..f04b2d1e --- /dev/null +++ b/lib/librte_eal/common/malloc_elem.h @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MALLOC_ELEM_H_ +#define MALLOC_ELEM_H_ + +#include + +/* dummy definition of struct so we can use pointers to it in malloc_elem struct */ +struct malloc_heap; + +enum elem_state { + ELEM_FREE = 0, + ELEM_BUSY, + ELEM_PAD /* element is a padding-only header */ +}; + +struct malloc_elem { + struct malloc_heap *heap; + struct malloc_elem *volatile prev; /* points to prev elem in memseg */ + LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */ + const struct rte_memseg *ms; + volatile enum elem_state state; + uint32_t pad; + size_t size; +#ifdef RTE_LIBRTE_MALLOC_DEBUG + uint64_t header_cookie; /* Cookie marking start of data */ + /* trailer cookie at start + size */ +#endif +} __rte_cache_aligned; + +#ifndef RTE_LIBRTE_MALLOC_DEBUG +static const unsigned MALLOC_ELEM_TRAILER_LEN = 0; + +/* dummy function - just check if pointer is non-null */ +static inline int +malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; } + +/* dummy function - no header if malloc_debug is not enabled */ +static inline void +set_header(struct malloc_elem *elem __rte_unused){ } + +/* dummy function - no trailer if malloc_debug is not enabled */ +static inline void +set_trailer(struct malloc_elem *elem __rte_unused){ } + + +#else +static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE; + +#define MALLOC_HEADER_COOKIE 0xbadbadbadadd2e55ULL /**< Header cookie. */ +#define MALLOC_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/ + +/* define macros to make referencing the header and trailer cookies easier */ +#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \ + elem->size - MALLOC_ELEM_TRAILER_LEN))) +#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie) + +static inline void +set_header(struct malloc_elem *elem) +{ + if (elem != NULL) + MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE; +} + +static inline void +set_trailer(struct malloc_elem *elem) +{ + if (elem != NULL) + MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE; +} + +/* check that the header and trailer cookies are set correctly */ +static inline int +malloc_elem_cookies_ok(const struct malloc_elem *elem) +{ + return elem != NULL && + MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE && + MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE; +} + +#endif + +static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem); +#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN) + +/* + * Given a pointer to the start of a memory block returned by malloc, get + * the actual malloc_elem header for that block. + */ +static inline struct malloc_elem * +malloc_elem_from_data(const void *data) +{ + if (data == NULL) + return NULL; + + struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN); + if (!malloc_elem_cookies_ok(elem)) + return NULL; + return elem->state != ELEM_PAD ? elem: RTE_PTR_SUB(elem, elem->pad); +} + +/* + * initialise a malloc_elem header + */ +void +malloc_elem_init(struct malloc_elem *elem, + struct malloc_heap *heap, + const struct rte_memseg *ms, + size_t size); + +/* + * initialise a dummy malloc_elem header for the end-of-memseg marker + */ +void +malloc_elem_mkend(struct malloc_elem *elem, + struct malloc_elem *prev_free); + +/* + * return true if the current malloc_elem can hold a block of data + * of the requested size and with the requested alignment + */ +int +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); + +/* + * reserve a block of data in an existing malloc_elem. If the malloc_elem + * is much larger than the data block requested, we split the element in two. + */ +struct malloc_elem * +malloc_elem_alloc(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); + +/* + * free a malloc_elem block by adding it to the free list. If the + * blocks either immediately before or immediately after newly freed block + * are also free, the blocks are merged together. + */ +int +malloc_elem_free(struct malloc_elem *elem); + +/* + * attempt to resize a malloc_elem by expanding into any free space + * immediately after it in memory. + */ +int +malloc_elem_resize(struct malloc_elem *elem, size_t size); + +/* + * Given an element size, compute its freelist index. + */ +size_t +malloc_elem_free_list_index(size_t size); + +/* + * Add element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem); + +#endif /* MALLOC_ELEM_H_ */ diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c new file mode 100644 index 00000000..763fa324 --- /dev/null +++ b/lib/librte_eal/common/malloc_heap.c @@ -0,0 +1,236 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_elem.h" +#include "malloc_heap.h" + +static unsigned +check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) +{ + unsigned check_flag = 0; + + if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY)) + return 1; + + switch (hugepage_sz) { + case RTE_PGSIZE_256K: + check_flag = RTE_MEMZONE_256KB; + break; + case RTE_PGSIZE_2M: + check_flag = RTE_MEMZONE_2MB; + break; + case RTE_PGSIZE_16M: + check_flag = RTE_MEMZONE_16MB; + break; + case RTE_PGSIZE_256M: + check_flag = RTE_MEMZONE_256MB; + break; + case RTE_PGSIZE_512M: + check_flag = RTE_MEMZONE_512MB; + break; + case RTE_PGSIZE_1G: + check_flag = RTE_MEMZONE_1GB; + break; + case RTE_PGSIZE_4G: + check_flag = RTE_MEMZONE_4GB; + break; + case RTE_PGSIZE_16G: + check_flag = RTE_MEMZONE_16GB; + } + + return check_flag & flags; +} + +/* + * Expand the heap with a memseg. + * This reserves the zone and sets a dummy malloc_elem header at the end + * to prevent overflow. The rest of the zone is added to free list as a single + * large free block + */ +static void +malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) +{ + /* allocate the memory block headers, one at end, one at start */ + struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr; + struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr, + ms->len - MALLOC_ELEM_OVERHEAD); + end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); + const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; + + malloc_elem_init(start_elem, heap, ms, elem_size); + malloc_elem_mkend(end_elem, start_elem); + malloc_elem_free_list_insert(start_elem); + + heap->total_size += elem_size; +} + +/* + * Iterates through the freelist for a heap to find a free element + * which can store data of the required size and with the requested alignment. + * If size is 0, find the biggest available elem. + * Returns null on failure, or pointer to element on success. + */ +static struct malloc_elem * +find_suitable_element(struct malloc_heap *heap, size_t size, + unsigned flags, size_t align, size_t bound) +{ + size_t idx; + struct malloc_elem *elem, *alt_elem = NULL; + + for (idx = malloc_elem_free_list_index(size); + idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) { + if (malloc_elem_can_hold(elem, size, align, bound)) { + if (check_hugepage_sz(flags, elem->ms->hugepage_sz)) + return elem; + if (alt_elem == NULL) + alt_elem = elem; + } + } + } + + if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY)) + return alt_elem; + + return NULL; +} + +/* + * Main function to allocate a block of memory from the heap. + * It locks the free list, scans it, and adds a new memseg if the + * scan fails. Once the new memseg is added, it re-scans and should return + * the new element after releasing the lock. + */ +void * +malloc_heap_alloc(struct malloc_heap *heap, + const char *type __attribute__((unused)), size_t size, unsigned flags, + size_t align, size_t bound) +{ + struct malloc_elem *elem; + + size = RTE_CACHE_LINE_ROUNDUP(size); + align = RTE_CACHE_LINE_ROUNDUP(align); + + rte_spinlock_lock(&heap->lock); + + elem = find_suitable_element(heap, size, flags, align, bound); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound); + /* increase heap's count of allocated elements */ + heap->alloc_count++; + } + rte_spinlock_unlock(&heap->lock); + + return elem == NULL ? NULL : (void *)(&elem[1]); +} + +/* + * Function to retrieve data for heap on given socket + */ +int +malloc_heap_get_stats(const struct malloc_heap *heap, + struct rte_malloc_socket_stats *socket_stats) +{ + size_t idx; + struct malloc_elem *elem; + + /* Initialise variables for heap */ + socket_stats->free_count = 0; + socket_stats->heap_freesz_bytes = 0; + socket_stats->greatest_free_size = 0; + + /* Iterate through free list */ + for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) + { + socket_stats->free_count++; + socket_stats->heap_freesz_bytes += elem->size; + if (elem->size > socket_stats->greatest_free_size) + socket_stats->greatest_free_size = elem->size; + } + } + /* Get stats on overall heap and allocated memory on this heap */ + socket_stats->heap_totalsz_bytes = heap->total_size; + socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - + socket_stats->heap_freesz_bytes); + socket_stats->alloc_count = heap->alloc_count; + return 0; +} + +int +rte_eal_malloc_heap_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned ms_cnt; + struct rte_memseg *ms; + + if (mcfg == NULL) + return -1; + + for (ms = &mcfg->memseg[0], ms_cnt = 0; + (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); + ms_cnt++, ms++) { +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * it is not memory to allocate from. + */ + if (ms->ioremap_addr != 0) + continue; +#endif + malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); + } + + return 0; +} diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h new file mode 100644 index 00000000..3ccbef0f --- /dev/null +++ b/lib/librte_eal/common/malloc_heap.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MALLOC_HEAP_H_ +#define MALLOC_HEAP_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline unsigned +malloc_get_numa_socket(void) +{ + unsigned socket_id = rte_socket_id(); + + if (socket_id == (unsigned)SOCKET_ID_ANY) + return 0; + + return socket_id; +} + +void * +malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, + unsigned flags, size_t align, size_t bound); + +int +malloc_heap_get_stats(const struct malloc_heap *heap, + struct rte_malloc_socket_stats *socket_stats); + +int +rte_eal_malloc_heap_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* MALLOC_HEAP_H_ */ diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c new file mode 100644 index 00000000..23363ec1 --- /dev/null +++ b/lib/librte_eal/common/rte_keepalive.c @@ -0,0 +1,149 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +struct rte_keepalive { + /** Core Liveness. */ + enum rte_keepalive_state { + ALIVE = 1, + MISSING = 0, + DEAD = 2, + GONE = 3 + } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES]; + + /** Last-seen-alive timestamps */ + uint64_t last_alive[RTE_KEEPALIVE_MAXCORES]; + + /** + * Cores to check. + * Indexed by core id, non-zero if the core should be checked. + */ + uint8_t active_cores[RTE_KEEPALIVE_MAXCORES]; + + /** Dead core handler. */ + rte_keepalive_failure_callback_t callback; + + /** + * Dead core handler app data. + * Pointer is passed to dead core handler. + */ + void *callback_data; + uint64_t tsc_initial; + uint64_t tsc_mhz; +}; + +static void +print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core) +{ + RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n", + msg, + ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000) + / rte_get_tsc_hz() + ); +} + +void +rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, + void *ptr_data) +{ + struct rte_keepalive *keepcfg = ptr_data; + int idx_core; + + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) { + if (keepcfg->active_cores[idx_core] == 0) + continue; + + switch (keepcfg->state_flags[idx_core]) { + case ALIVE: /* Alive */ + keepcfg->state_flags[idx_core] = MISSING; + keepcfg->last_alive[idx_core] = rte_rdtsc(); + break; + case MISSING: /* MIA */ + print_trace("Core MIA. ", keepcfg, idx_core); + keepcfg->state_flags[idx_core] = DEAD; + break; + case DEAD: /* Dead */ + keepcfg->state_flags[idx_core] = GONE; + print_trace("Core died. ", keepcfg, idx_core); + if (keepcfg->callback) + keepcfg->callback( + keepcfg->callback_data, + idx_core + ); + break; + case GONE: /* Buried */ + break; + } + } +} + +struct rte_keepalive * +rte_keepalive_create(rte_keepalive_failure_callback_t callback, + void *data) +{ + struct rte_keepalive *keepcfg; + + keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE", + sizeof(struct rte_keepalive), + RTE_CACHE_LINE_SIZE); + if (keepcfg != NULL) { + keepcfg->callback = callback; + keepcfg->callback_data = data; + keepcfg->tsc_initial = rte_rdtsc(); + keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000; + } + return keepcfg; +} + +void +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core) +{ + if (id_core < RTE_KEEPALIVE_MAXCORES) { + keepcfg->active_cores[id_core] = 1; + keepcfg->last_alive[id_core] = rte_rdtsc(); + } +} + +void +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) +{ + keepcfg->state_flags[rte_lcore_id()] = ALIVE; +} diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c new file mode 100644 index 00000000..47deb007 --- /dev/null +++ b/lib/librte_eal/common/rte_malloc.c @@ -0,0 +1,262 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "malloc_elem.h" +#include "malloc_heap.h" + + +/* Free the memory space back to heap */ +void rte_free(void *addr) +{ + if (addr == NULL) return; + if (malloc_elem_free(malloc_elem_from_data(addr)) < 0) + rte_panic("Fatal error: Invalid memory\n"); +} + +/* + * Allocate memory on specified heap. + */ +void * +rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int socket, i; + void *ret; + + /* return NULL if size is 0 or alignment is not power-of-2 */ + if (size == 0 || (align && !rte_is_power_of_2(align))) + return NULL; + + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + + if (socket_arg == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_arg; + + /* Check socket parameter */ + if (socket >= RTE_MAX_NUMA_NODES) + return NULL; + + ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type, + size, 0, align == 0 ? 1 : align, 0); + if (ret != NULL || socket_arg != SOCKET_ID_ANY) + return ret; + + /* try other heaps */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + /* we already tried this one */ + if (i == socket) + continue; + + ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, + size, 0, align == 0 ? 1 : align, 0); + if (ret != NULL) + return ret; + } + + return NULL; +} + +/* + * Allocate memory on default heap. + */ +void * +rte_malloc(const char *type, size_t size, unsigned align) +{ + return rte_malloc_socket(type, size, align, SOCKET_ID_ANY); +} + +/* + * Allocate zero'd memory on specified heap. + */ +void * +rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket) +{ + void *ptr = rte_malloc_socket(type, size, align, socket); + + if (ptr != NULL) + memset(ptr, 0, size); + return ptr; +} + +/* + * Allocate zero'd memory on default heap. + */ +void * +rte_zmalloc(const char *type, size_t size, unsigned align) +{ + return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY); +} + +/* + * Allocate zero'd memory on specified heap. + */ +void * +rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket) +{ + return rte_zmalloc_socket(type, num * size, align, socket); +} + +/* + * Allocate zero'd memory on default heap. + */ +void * +rte_calloc(const char *type, size_t num, size_t size, unsigned align) +{ + return rte_zmalloc(type, num * size, align); +} + +/* + * Resize allocated memory. + */ +void * +rte_realloc(void *ptr, size_t size, unsigned align) +{ + if (ptr == NULL) + return rte_malloc(NULL, size, align); + + struct malloc_elem *elem = malloc_elem_from_data(ptr); + if (elem == NULL) + rte_panic("Fatal error: memory corruption detected\n"); + + size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align); + /* check alignment matches first, and if ok, see if we can resize block */ + if (RTE_PTR_ALIGN(ptr,align) == ptr && + malloc_elem_resize(elem, size) == 0) + return ptr; + + /* either alignment is off, or we have no room to expand, + * so move data. */ + void *new_ptr = rte_malloc(NULL, size, align); + if (new_ptr == NULL) + return NULL; + const unsigned old_size = elem->size - MALLOC_ELEM_OVERHEAD; + rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size); + rte_free(ptr); + + return new_ptr; +} + +int +rte_malloc_validate(const void *ptr, size_t *size) +{ + const struct malloc_elem *elem = malloc_elem_from_data(ptr); + if (!malloc_elem_cookies_ok(elem)) + return -1; + if (size != NULL) + *size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD; + return 0; +} + +/* + * Function to retrieve data for heap on given socket + */ +int +rte_malloc_get_socket_stats(int socket, + struct rte_malloc_socket_stats *socket_stats) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + if (socket >= RTE_MAX_NUMA_NODES || socket < 0) + return -1; + + return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats); +} + +/* + * Print stats on memory type. If type is NULL, info on all types is printed + */ +void +rte_malloc_dump_stats(FILE *f, __rte_unused const char *type) +{ + unsigned int socket; + struct rte_malloc_socket_stats sock_stats; + /* Iterate through all initialised heaps */ + for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) { + if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0)) + continue; + + fprintf(f, "Socket:%u\n", socket); + fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes); + fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes); + fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes); + fprintf(f, "\tGreatest_free_size:%zu,\n", + sock_stats.greatest_free_size); + fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count); + fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count); + } + return; +} + +/* + * TODO: Set limit to memory that can be allocated to memory type + */ +int +rte_malloc_set_limit(__rte_unused const char *type, + __rte_unused size_t max) +{ + return 0; +} + +/* + * Return the physical address of a virtual address obtained through rte_malloc + */ +phys_addr_t +rte_malloc_virt2phy(const void *addr) +{ + const struct malloc_elem *elem = malloc_elem_from_data(addr); + if (elem == NULL) + return 0; + return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); +} diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile new file mode 100644 index 00000000..20d2a916 --- /dev/null +++ b/lib/librte_eal/linuxapp/Makefile @@ -0,0 +1,39 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal +DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio +DIRS-$(CONFIG_RTE_KNI_KMOD) += kni +DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0 + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile new file mode 100644 index 00000000..e1093619 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -0,0 +1,139 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_eal.a + +ARCH_DIR ?= $(RTE_ARCH) +EXPORT_MAP := rte_eal_version.map +VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) + +LIBABIVER := 2 + +VPATH += $(RTE_SDK)/lib/librte_eal/common + +CFLAGS += -I$(SRCDIR)/include +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include +CFLAGS += -I$(RTE_SDK)/lib/librte_ring +CFLAGS += -I$(RTE_SDK)/lib/librte_mempool +CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem +CFLAGS += $(WERROR_FLAGS) -O3 + +LDLIBS += -ldl +LDLIBS += -lpthread +LDLIBS += -lgcc_s +LDLIBS += -lrt + +# specific to linuxapp exec-env +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c +ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c +endif +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio_mp_sync.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c +ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c +endif + +# from common dir +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c + +# from arch dir +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c + +CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) + +CFLAGS_eal.o := -D_GNU_SOURCE +CFLAGS_eal_interrupts.o := -D_GNU_SOURCE +CFLAGS_eal_pci_vfio_mp_sync.o := -D_GNU_SOURCE +CFLAGS_eal_timer.o := -D_GNU_SOURCE +CFLAGS_eal_lcore.o := -D_GNU_SOURCE +CFLAGS_eal_thread.o := -D_GNU_SOURCE +CFLAGS_eal_log.o := -D_GNU_SOURCE +CFLAGS_eal_common_log.o := -D_GNU_SOURCE +CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE +CFLAGS_eal_pci.o := -D_GNU_SOURCE +CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE +CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE +CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE +CFLAGS_eal_common_options.o := -D_GNU_SOURCE +CFLAGS_eal_common_thread.o := -D_GNU_SOURCE +CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_eal_thread.o += -Wno-return-type +endif + +INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h + +SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ + $(addprefix include/exec-env/,$(INC)) + +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c new file mode 100644 index 00000000..8aafd519 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -0,0 +1,936 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2012-2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(RTE_ARCH_X86) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" +#include "eal_options.h" + +#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) + +#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) + +/* Allow the application to print its usage message too if set */ +static rte_usage_hook_t rte_application_usage_hook = NULL; + +/* early configuration structure, when memory config is not mmapped */ +static struct rte_mem_config early_mem_config; + +/* define fd variable here, because file needs to be kept open for the + * duration of the program, as we hold a write lock on it in the primary proc */ +static int mem_cfg_fd = -1; + +static struct flock wr_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = offsetof(struct rte_mem_config, memseg), + .l_len = sizeof(early_mem_config.memseg), +}; + +/* Address of global and public configuration */ +static struct rte_config rte_config = { + .mem_config = &early_mem_config, +}; + +/* internal configuration (per-core) */ +struct lcore_config lcore_config[RTE_MAX_LCORE]; + +/* internal configuration */ +struct internal_config internal_config; + +/* used by rte_rdtsc() */ +int rte_cycles_vmware_tsc_map; + +/* Return a pointer to the configuration structure */ +struct rte_config * +rte_eal_get_configuration(void) +{ + return &rte_config; +} + +/* parse a sysfs (or other) file containing one integer value */ +int +eal_parse_sysfs_value(const char *filename, unsigned long *val) +{ + FILE *f; + char buf[BUFSIZ]; + char *end = NULL; + + if ((f = fopen(filename, "r")) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + *val = strtoul(buf, &end, 0); + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + + +/* create memory configuration in shared/mmap memory. Take out + * a write lock on the memsegs, so we can auto-detect primary/secondary. + * This means we never close the file while running (auto-close on exit). + * We also don't lock the whole file, so that in future we can use read-locks + * on other parts, e.g. memzones, to detect if there are running secondary + * processes. */ +static void +rte_eal_config_create(void) +{ + void *rte_mem_cfg_addr; + int retval; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + /* map the config before hugepage address so that we don't waste a page */ + if (internal_config.base_virtaddr != 0) + rte_mem_cfg_addr = (void *) + RTE_ALIGN_FLOOR(internal_config.base_virtaddr - + sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + else + rte_mem_cfg_addr = NULL; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + if (retval < 0){ + close(mem_cfg_fd); + rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname); + } + + retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); + if (retval < 0){ + close(mem_cfg_fd); + rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary " + "process running?\n", pathname); + } + + rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + + if (rte_mem_cfg_addr == MAP_FAILED){ + rte_panic("Cannot mmap memory for rte_config\n"); + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); + rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + + /* store address of the config in the config itself so that secondary + * processes could later map the config into this exact location */ + rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; + +} + +/* attach to an existing shared memory config */ +static void +rte_eal_config_attach(void) +{ + struct rte_mem_config *mem_config; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + /* map it as read-only first */ + mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), + PROT_READ, MAP_SHARED, mem_cfg_fd, 0); + if (mem_config == MAP_FAILED) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = mem_config; +} + +/* reattach the shared config at exact memory location primary process has it */ +static void +rte_eal_config_reattach(void) +{ + struct rte_mem_config *mem_config; + void *rte_mem_cfg_addr; + + if (internal_config.no_shconf) + return; + + /* save the address primary process has mapped shared config to */ + rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr; + + /* unmap original config */ + munmap(rte_config.mem_config, sizeof(struct rte_mem_config)); + + /* remap the config at proper address */ + mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, + sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, + mem_cfg_fd, 0); + close(mem_cfg_fd); + if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = mem_config; +} + +/* Detect if we are a primary or a secondary process */ +enum rte_proc_type_t +eal_proc_type_detect(void) +{ + enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; + const char *pathname = eal_runtime_config_path(); + + /* if we can open the file but not get a write-lock we are a secondary + * process. NOTE: if we get a file handle back, we keep that open + * and don't close it to prevent a race condition between multiple opens */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + + RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", + ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); + + return ptype; +} + +/* Sets up rte_config structure with the pointer to shared memory config.*/ +static void +rte_config_init(void) +{ + rte_config.process_type = internal_config.process_type; + + switch (rte_config.process_type){ + case RTE_PROC_PRIMARY: + rte_eal_config_create(); + break; + case RTE_PROC_SECONDARY: + rte_eal_config_attach(); + rte_eal_mcfg_wait_complete(rte_config.mem_config); + rte_eal_config_reattach(); + break; + case RTE_PROC_AUTO: + case RTE_PROC_INVALID: + rte_panic("Invalid process type\n"); + } +} + +/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ +static void +eal_hugedirs_unlock(void) +{ + int i; + + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + { + /* skip uninitialized */ + if (internal_config.hugepage_info[i].lock_descriptor < 0) + continue; + /* unlock hugepage file */ + flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN); + close(internal_config.hugepage_info[i].lock_descriptor); + /* reset the field */ + internal_config.hugepage_info[i].lock_descriptor = -1; + } +} + +/* display usage */ +static void +eal_usage(const char *prgname) +{ + printf("\nUsage: %s ", prgname); + eal_common_usage(); + printf("EAL Linux options:\n" + " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" + " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" + " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" + " --"OPT_BASE_VIRTADDR" Base virtual address\n" + " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" + " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" + " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" + "\n"); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { + printf("===== Application Usage =====\n\n"); + rte_application_usage_hook(prgname); + } +} + +/* Set a per-application usage message */ +rte_usage_hook_t +rte_set_application_usage_hook( rte_usage_hook_t usage_func ) +{ + rte_usage_hook_t old_func; + + /* Will be NULL on the first call to denote the last usage routine. */ + old_func = rte_application_usage_hook; + rte_application_usage_hook = usage_func; + + return old_func; +} + +static int +eal_parse_socket_mem(char *socket_mem) +{ + char * arg[RTE_MAX_NUMA_NODES]; + char *end; + int arg_num, i, len; + uint64_t total_mem = 0; + + len = strnlen(socket_mem, SOCKET_MEM_STRLEN); + if (len == SOCKET_MEM_STRLEN) { + RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); + return -1; + } + + /* all other error cases will be caught later */ + if (!isdigit(socket_mem[len-1])) + return -1; + + /* split the optarg into separate socket values */ + arg_num = rte_strsplit(socket_mem, len, + arg, RTE_MAX_NUMA_NODES, ','); + + /* if split failed, or 0 arguments */ + if (arg_num <= 0) + return -1; + + internal_config.force_sockets = 1; + + /* parse each defined socket option */ + errno = 0; + for (i = 0; i < arg_num; i++) { + end = NULL; + internal_config.socket_mem[i] = strtoull(arg[i], &end, 10); + + /* check for invalid input */ + if ((errno != 0) || + (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + internal_config.socket_mem[i] *= 1024ULL; + internal_config.socket_mem[i] *= 1024ULL; + total_mem += internal_config.socket_mem[i]; + } + + /* check if we have a positive amount of total memory */ + if (total_mem == 0) + return -1; + + return 0; +} + +static int +eal_parse_base_virtaddr(const char *arg) +{ + char *end; + uint64_t addr; + + errno = 0; + addr = strtoull(arg, &end, 16); + + /* check for errors */ + if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0')) + return -1; + + /* make sure we don't exceed 32-bit boundary on 32-bit target */ +#ifndef RTE_ARCH_64 + if (addr >= UINTPTR_MAX) + return -1; +#endif + + /* align the addr on 16M boundary, 16MB is the minimum huge page + * size on IBM Power architecture. If the addr is aligned to 16MB, + * it can align to 2MB for x86. So this alignment can also be used + * on x86 */ + internal_config.base_virtaddr = + RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M); + + return 0; +} + +static int +eal_parse_vfio_intr(const char *mode) +{ + unsigned i; + static struct { + const char *name; + enum rte_intr_mode value; + } map[] = { + { "legacy", RTE_INTR_MODE_LEGACY }, + { "msi", RTE_INTR_MODE_MSI }, + { "msix", RTE_INTR_MODE_MSIX }, + }; + + for (i = 0; i < RTE_DIM(map); i++) { + if (!strcmp(mode, map[i].name)) { + internal_config.vfio_intr_mode = map[i].value; + return 0; + } + } + return -1; +} + +static inline size_t +eal_get_hugepage_mem_size(void) +{ + uint64_t size = 0; + unsigned i, j; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } + } + + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; +} + +/* Parse the arguments for --log-level only */ +static void +eal_log_level_parse(int argc, char **argv) +{ + int opt; + char **argvopt; + int option_index; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + + eal_reset_internal_config(&internal_config); + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + int ret; + + /* getopt is not happy, stop right now */ + if (opt == '?') + break; + + ret = (opt == OPT_LOG_LEVEL_NUM) ? + eal_parse_common_option(opt, optarg, &internal_config) : 0; + + /* common parser is not happy */ + if (ret < 0) + break; + } + + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; +} + +/* Parse the argument given in the command line of the application */ +static int +eal_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + /* getopt is not happy, stop right now */ + if (opt == '?') { + eal_usage(prgname); + ret = -1; + goto out; + } + + ret = eal_parse_common_option(opt, optarg, &internal_config); + /* common parser is not happy */ + if (ret < 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + /* common parser handled this option */ + if (ret == 0) + continue; + + switch (opt) { + case 'h': + eal_usage(prgname); + exit(EXIT_SUCCESS); + + /* long options */ + case OPT_XEN_DOM0_NUM: +#ifdef RTE_LIBRTE_XEN_DOM0 + internal_config.xen_dom0_support = 1; +#else + RTE_LOG(ERR, EAL, "Can't support DPDK app " + "running on Dom0, please configure" + " RTE_LIBRTE_XEN_DOM0=y\n"); + ret = -1; + goto out; +#endif + break; + + case OPT_HUGE_DIR_NUM: + internal_config.hugepage_dir = optarg; + break; + + case OPT_FILE_PREFIX_NUM: + internal_config.hugefile_prefix = optarg; + break; + + case OPT_SOCKET_MEM_NUM: + if (eal_parse_socket_mem(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_MEM "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_BASE_VIRTADDR_NUM: + if (eal_parse_base_virtaddr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_BASE_VIRTADDR "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_VFIO_INTR_NUM: + if (eal_parse_vfio_intr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_VFIO_INTR "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_CREATE_UIO_DEV_NUM: + internal_config.create_uio_dev = 1; + break; + + default: + if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { + RTE_LOG(ERR, EAL, "Option %c is not supported " + "on Linux\n", opt); + } else if (opt >= OPT_LONG_MIN_NUM && + opt < OPT_LONG_MAX_NUM) { + RTE_LOG(ERR, EAL, "Option %s is not supported " + "on Linux\n", + eal_long_options[option_index].name); + } else { + RTE_LOG(ERR, EAL, "Option %d is not supported " + "on Linux\n", opt); + } + eal_usage(prgname); + ret = -1; + goto out; + } + } + + if (eal_adjust_config(&internal_config) != 0) { + ret = -1; + goto out; + } + + /* sanity checks */ + if (eal_check_common_options(&internal_config) != 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + + /* --xen-dom0 doesn't make sense with --socket-mem */ + if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified " + "together with --"OPT_XEN_DOM0"\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + + if (optind >= 0) + argv[optind-1] = prgname; + ret = optind-1; + +out: + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; + + return ret; +} + +static void +eal_check_mem_on_local_socket(void) +{ + const struct rte_memseg *ms; + int i, socket_id; + + socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); + + ms = rte_eal_get_physmem_layout(); + + for (i = 0; i < RTE_MAX_MEMSEG; i++) + if (ms[i].socket_id == socket_id && + ms[i].len > 0) + return; + + RTE_LOG(WARNING, EAL, "WARNING: Master core has no " + "memory on local socket!\n"); +} + +static int +sync_func(__attribute__((unused)) void *arg) +{ + return 0; +} + +inline static void +rte_eal_mcfg_complete(void) +{ + /* ALL shared mem_config related INIT DONE */ + if (rte_config.process_type == RTE_PROC_PRIMARY) + rte_config.mem_config->magic = RTE_MAGIC; +} + +/* + * Request iopl privilege for all RPL, returns 0 on success + * iopl() call is mostly for the i386 architecture. For other architectures, + * return -1 to indicate IO privilege can't be changed in this way. + */ +int +rte_eal_iopl_init(void) +{ +#if defined(RTE_ARCH_X86) + if (iopl(3) != 0) + return -1; + return 0; +#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) + return 0; /* iopl syscall not supported for ARM/ARM64 */ +#else + return -1; +#endif +} + +/* Launch threads, called at application init(). */ +int +rte_eal_init(int argc, char **argv) +{ + int i, fctret, ret; + pthread_t thread_id; + static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); + const char *logid; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (!rte_atomic32_test_and_set(&run_once)) + return -1; + + logid = strrchr(argv[0], '/'); + logid = strdup(logid ? logid + 1: argv[0]); + + thread_id = pthread_self(); + + if (rte_eal_log_early_init() < 0) + rte_panic("Cannot init early logs\n"); + + eal_log_level_parse(argc, argv); + + /* set log level as early as possible */ + rte_set_log_level(internal_config.log_level); + + if (rte_eal_cpu_init() < 0) + rte_panic("Cannot detect lcores\n"); + + fctret = eal_parse_args(argc, argv); + if (fctret < 0) + exit(1); + + if (internal_config.no_hugetlbfs == 0 && + internal_config.process_type != RTE_PROC_SECONDARY && + internal_config.xen_dom0_support == 0 && + eal_hugepage_info_init() < 0) + rte_panic("Cannot get hugepage information\n"); + + if (internal_config.memory == 0 && internal_config.force_sockets == 0) { + if (internal_config.no_hugetlbfs) + internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; + else + internal_config.memory = eal_get_hugepage_mem_size(); + } + + if (internal_config.vmware_tsc_map == 1) { +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + rte_cycles_vmware_tsc_map = 1; + RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " + "you must have monitor_control.pseudo_perfctr = TRUE\n"); +#else + RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " + "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); +#endif + } + + rte_srand(rte_rdtsc()); + + rte_config_init(); + + if (rte_eal_pci_init() < 0) + rte_panic("Cannot init PCI\n"); + +#ifdef RTE_LIBRTE_IVSHMEM + if (rte_eal_ivshmem_init() < 0) + rte_panic("Cannot init IVSHMEM\n"); +#endif + + if (rte_eal_memory_init() < 0) + rte_panic("Cannot init memory\n"); + + /* the directories are locked during eal_hugepage_info_init */ + eal_hugedirs_unlock(); + + if (rte_eal_memzone_init() < 0) + rte_panic("Cannot init memzone\n"); + + if (rte_eal_tailqs_init() < 0) + rte_panic("Cannot init tail queues for objects\n"); + +#ifdef RTE_LIBRTE_IVSHMEM + if (rte_eal_ivshmem_obj_init() < 0) + rte_panic("Cannot init IVSHMEM objects\n"); +#endif + + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n"); + + if (rte_eal_alarm_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + if (rte_eal_timer_init() < 0) + rte_panic("Cannot init HPET or TSC timers\n"); + + eal_check_mem_on_local_socket(); + + if (eal_plugins_init() < 0) + rte_panic("Cannot init plugins\n"); + + eal_thread_init_master(rte_config.master_lcore); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n", + rte_config.master_lcore, (int)thread_id, cpuset, + ret == 0 ? "" : "..."); + + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + + if (rte_eal_intr_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + RTE_LCORE_FOREACH_SLAVE(i) { + + /* + * create communication pipes between master thread + * and children + */ + if (pipe(lcore_config[i].pipe_master2slave) < 0) + rte_panic("Cannot create pipe\n"); + if (pipe(lcore_config[i].pipe_slave2master) < 0) + rte_panic("Cannot create pipe\n"); + + lcore_config[i].state = WAIT; + + /* create a thread for each lcore */ + ret = pthread_create(&lcore_config[i].thread_id, NULL, + eal_thread_loop, NULL); + if (ret != 0) + rte_panic("Cannot create thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "lcore-slave-%d", i); + ret = rte_thread_setname(lcore_config[i].thread_id, + thread_name); + if (ret != 0) + RTE_LOG(ERR, EAL, + "Cannot set name for lcore thread\n"); + } + + /* + * Launch a dummy function on all slave lcores, so that master lcore + * knows they are all ready when this function returns. + */ + rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); + rte_eal_mp_wait_lcore(); + + /* Probe & Initialize PCI devices */ + if (rte_eal_pci_probe()) + rte_panic("Cannot probe PCI\n"); + + rte_eal_mcfg_complete(); + + return fctret; +} + +/* get core role */ +enum rte_lcore_role_t +rte_eal_lcore_role(unsigned lcore_id) +{ + return rte_config.lcore_role[lcore_id]; +} + +enum rte_proc_type_t +rte_eal_process_type(void) +{ + return rte_config.process_type; +} + +int rte_eal_has_hugepages(void) +{ + return ! internal_config.no_hugetlbfs; +} + +int +rte_eal_check_module(const char *module_name) +{ + char sysfs_mod_name[PATH_MAX]; + struct stat st; + int n; + + if (NULL == module_name) + return -1; + + /* Check if there is sysfs mounted */ + if (stat("/sys/module", &st) != 0) { + RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + /* A module might be built-in, therefore try sysfs */ + n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); + if (n < 0 || n > PATH_MAX) { + RTE_LOG(DEBUG, EAL, "Could not format module path\n"); + return -1; + } + + if (stat(sysfs_mod_name, &st) != 0) { + RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n", + sysfs_mod_name, errno, strerror(errno)); + return 0; + } + + /* Module has been found */ + return 1; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c new file mode 100644 index 00000000..8b042abc --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -0,0 +1,273 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TFD_NONBLOCK +#include +#define TFD_NONBLOCK O_NONBLOCK +#endif + +#define NS_PER_US 1000 +#define US_PER_MS 1000 +#define MS_PER_S 1000 +#define US_PER_S (US_PER_MS * MS_PER_S) + +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +struct alarm_entry { + LIST_ENTRY(alarm_entry) next; + struct timeval time; + rte_eal_alarm_callback cb_fn; + void *cb_arg; + volatile uint8_t executing; + volatile pthread_t executing_id; +}; + +static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER(); +static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; + +static struct rte_intr_handle intr_handle = {.fd = -1 }; +static int handler_registered = 0; +static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg); + +int +rte_eal_alarm_init(void) +{ + intr_handle.type = RTE_INTR_HANDLE_ALARM; + /* create a timerfd file descriptor */ + intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); + if (intr_handle.fd == -1) + goto error; + + return 0; + +error: + rte_errno = errno; + return -1; +} + +static void +eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused, + void *arg __rte_unused) +{ + struct timespec now; + struct alarm_entry *ap; + + rte_spinlock_lock(&alarm_list_lk); + while ((ap = LIST_FIRST(&alarm_list)) !=NULL && + clock_gettime(CLOCK_TYPE_ID, &now) == 0 && + (ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec && + (ap->time.tv_usec * NS_PER_US) <= now.tv_nsec))) { + ap->executing = 1; + ap->executing_id = pthread_self(); + rte_spinlock_unlock(&alarm_list_lk); + + ap->cb_fn(ap->cb_arg); + + rte_spinlock_lock(&alarm_list_lk); + + LIST_REMOVE(ap, next); + rte_free(ap); + } + + if (!LIST_EMPTY(&alarm_list)) { + struct itimerspec atime = { .it_interval = { 0, 0 } }; + + ap = LIST_FIRST(&alarm_list); + atime.it_value.tv_sec = ap->time.tv_sec; + atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US; + /* perform borrow for subtraction if necessary */ + if (now.tv_nsec > (ap->time.tv_usec * NS_PER_US)) + atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US; + + atime.it_value.tv_sec -= now.tv_sec; + atime.it_value.tv_nsec -= now.tv_nsec; + timerfd_settime(intr_handle.fd, 0, &atime, NULL); + } + rte_spinlock_unlock(&alarm_list_lk); +} + +int +rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct timespec now; + int ret = 0; + struct alarm_entry *ap, *new_alarm; + + /* Check parameters, including that us won't cause a uint64_t overflow */ + if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL) + return -EINVAL; + + new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0); + if (new_alarm == NULL) + return -ENOMEM; + + /* use current time to calculate absolute time of alarm */ + clock_gettime(CLOCK_TYPE_ID, &now); + + new_alarm->cb_fn = cb_fn; + new_alarm->cb_arg = cb_arg; + new_alarm->time.tv_usec = ((now.tv_nsec / NS_PER_US) + us) % US_PER_S; + new_alarm->time.tv_sec = now.tv_sec + (((now.tv_nsec / NS_PER_US) + us) / US_PER_S); + + rte_spinlock_lock(&alarm_list_lk); + if (!handler_registered) { + ret |= rte_intr_callback_register(&intr_handle, + eal_alarm_callback, NULL); + handler_registered = (ret == 0) ? 1 : 0; + } + + if (LIST_EMPTY(&alarm_list)) + LIST_INSERT_HEAD(&alarm_list, new_alarm, next); + else { + LIST_FOREACH(ap, &alarm_list, next) { + if (ap->time.tv_sec > new_alarm->time.tv_sec || + (ap->time.tv_sec == new_alarm->time.tv_sec && + ap->time.tv_usec > new_alarm->time.tv_usec)){ + LIST_INSERT_BEFORE(ap, new_alarm, next); + break; + } + if (LIST_NEXT(ap, next) == NULL) { + LIST_INSERT_AFTER(ap, new_alarm, next); + break; + } + } + } + + if (LIST_FIRST(&alarm_list) == new_alarm) { + struct itimerspec alarm_time = { + .it_interval = {0, 0}, + .it_value = { + .tv_sec = us / US_PER_S, + .tv_nsec = (us % US_PER_S) * NS_PER_US, + }, + }; + ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL); + } + rte_spinlock_unlock(&alarm_list_lk); + + return ret; +} + +int +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct alarm_entry *ap, *ap_prev; + int count = 0; + int err = 0; + int executing; + + if (!cb_fn) { + rte_errno = EINVAL; + return -1; + } + + do { + executing = 0; + rte_spinlock_lock(&alarm_list_lk); + /* remove any matches at the start of the list */ + while ((ap = LIST_FIRST(&alarm_list)) != NULL && + cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { + + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + rte_free(ap); + count++; + } else { + /* If calling from other context, mark that alarm is executing + * so loop can spin till it finish. Otherwise we are trying to + * cancel our self - mark it by EINPROGRESS */ + if (pthread_equal(ap->executing_id, pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + + break; + } + } + ap_prev = ap; + + /* now go through list, removing entries not at start */ + LIST_FOREACH(ap, &alarm_list, next) { + /* this won't be true first time through */ + if (cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { + + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + rte_free(ap); + count++; + ap = ap_prev; + } else if (pthread_equal(ap->executing_id, pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + } + ap_prev = ap; + } + rte_spinlock_unlock(&alarm_list_lk); + } while (executing != 0); + + if (count == 0 && err == 0) + rte_errno = ENOENT; + else if (err) + rte_errno = err; + + return count; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c new file mode 100644 index 00000000..907fbfa7 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BACKTRACE_SIZE 256 + +/* dump the stack of the calling core */ +void rte_dump_stack(void) +{ + void *func[BACKTRACE_SIZE]; + char **symb = NULL; + int size; + + size = backtrace(func, BACKTRACE_SIZE); + symb = backtrace_symbols(func, size); + + if (symb == NULL) + return; + + while (size > 0) { + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, + "%d: [%s]\n", size, symb[size - 1]); + size --; + } + + free(symb); +} + +/* not implemented in this environment */ +void rte_dump_registers(void) +{ + return; +} + +/* call abort(), it will generate a coredump if enabled */ +void __rte_panic(const char *funcname, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + rte_dump_stack(); + rte_dump_registers(); + abort(); +} + +/* + * Like rte_panic this terminates the application. However, no traceback is + * provided and no core-dump is generated. + */ +void +rte_exit(int exit_code, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + if (exit_code != 0) + RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" + " Cause: ", exit_code); + + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + +#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR + exit(exit_code); +#else + rte_dump_stack(); + rte_dump_registers(); + abort(); +#endif +} diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c new file mode 100644 index 00000000..18858e2d --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -0,0 +1,365 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rte_string_fns.h" +#include "eal_internal_cfg.h" +#include "eal_hugepages.h" +#include "eal_filesystem.h" + +static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; + +/* this function is only called from eal_hugepage_info_init which itself + * is only called from a primary process */ +static uint32_t +get_num_hugepages(const char *subdir) +{ + char path[PATH_MAX]; + long unsigned resv_pages, num_pages = 0; + const char *nr_hp_file = "free_hugepages"; + const char *nr_rsvd_file = "resv_hugepages"; + + /* first, check how many reserved pages kernel reports */ + snprintf(path, sizeof(path), "%s/%s/%s", + sys_dir_path, subdir, nr_rsvd_file); + if (eal_parse_sysfs_value(path, &resv_pages) < 0) + return 0; + + snprintf(path, sizeof(path), "%s/%s/%s", + sys_dir_path, subdir, nr_hp_file); + if (eal_parse_sysfs_value(path, &num_pages) < 0) + return 0; + + if (num_pages == 0) + RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", + subdir); + + /* adjust num_pages */ + if (num_pages >= resv_pages) + num_pages -= resv_pages; + else if (resv_pages) + num_pages = 0; + + /* we want to return a uint32_t and more than this looks suspicious + * anyway ... */ + if (num_pages > UINT32_MAX) + num_pages = UINT32_MAX; + + return num_pages; +} + +static uint64_t +get_default_hp_size(void) +{ + const char proc_meminfo[] = "/proc/meminfo"; + const char str_hugepagesz[] = "Hugepagesize:"; + unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1; + char buffer[256]; + unsigned long long size = 0; + + FILE *fd = fopen(proc_meminfo, "r"); + if (fd == NULL) + rte_panic("Cannot open %s\n", proc_meminfo); + while(fgets(buffer, sizeof(buffer), fd)){ + if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){ + size = rte_str_to_size(&buffer[hugepagesz_len]); + break; + } + } + fclose(fd); + if (size == 0) + rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo); + return size; +} + +static const char * +get_hugepage_dir(uint64_t hugepage_sz) +{ + enum proc_mount_fieldnames { + DEVICE = 0, + MOUNTPT, + FSTYPE, + OPTIONS, + _FIELDNAME_MAX + }; + static uint64_t default_size = 0; + const char proc_mounts[] = "/proc/mounts"; + const char hugetlbfs_str[] = "hugetlbfs"; + const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1; + const char pagesize_opt[] = "pagesize="; + const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1; + const char split_tok = ' '; + char *splitstr[_FIELDNAME_MAX]; + char buf[BUFSIZ]; + char *retval = NULL; + + FILE *fd = fopen(proc_mounts, "r"); + if (fd == NULL) + rte_panic("Cannot open %s\n", proc_mounts); + + if (default_size == 0) + default_size = get_default_hp_size(); + + while (fgets(buf, sizeof(buf), fd)){ + if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, + split_tok) != _FIELDNAME_MAX) { + RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); + break; /* return NULL */ + } + + /* we have a specified --huge-dir option, only examine that dir */ + if (internal_config.hugepage_dir != NULL && + strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0) + continue; + + if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){ + const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt); + + /* if no explicit page size, the default page size is compared */ + if (pagesz_str == NULL){ + if (hugepage_sz == default_size){ + retval = strdup(splitstr[MOUNTPT]); + break; + } + } + /* there is an explicit page size, so check it */ + else { + uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); + if (pagesz == hugepage_sz) { + retval = strdup(splitstr[MOUNTPT]); + break; + } + } + } /* end if strncmp hugetlbfs */ + } /* end while fgets */ + + fclose(fd); + return retval; +} + +/* + * Clear the hugepage directory of whatever hugepage files + * there are. Checks if the file is locked (i.e. + * if it's in use by another DPDK process). + */ +static int +clear_hugedir(const char * hugedir) +{ + DIR *dir; + struct dirent *dirent; + int dir_fd, fd, lck_result; + const char filter[] = "*map_*"; /* matches hugepage files */ + + /* open directory */ + dir = opendir(hugedir); + if (!dir) { + RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n", + hugedir); + goto error; + } + dir_fd = dirfd(dir); + + dirent = readdir(dir); + if (!dirent) { + RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n", + hugedir); + goto error; + } + + while(dirent != NULL){ + /* skip files that don't match the hugepage pattern */ + if (fnmatch(filter, dirent->d_name, 0) > 0) { + dirent = readdir(dir); + continue; + } + + /* try and lock the file */ + fd = openat(dir_fd, dirent->d_name, O_RDONLY); + + /* skip to next file */ + if (fd == -1) { + dirent = readdir(dir); + continue; + } + + /* non-blocking lock */ + lck_result = flock(fd, LOCK_EX | LOCK_NB); + + /* if lock succeeds, unlock and remove the file */ + if (lck_result != -1) { + flock(fd, LOCK_UN); + unlinkat(dir_fd, dirent->d_name, 0); + } + close (fd); + dirent = readdir(dir); + } + + closedir(dir); + return 0; + +error: + if (dir) + closedir(dir); + + RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n", + strerror(errno)); + + return -1; +} + +static int +compare_hpi(const void *a, const void *b) +{ + const struct hugepage_info *hpi_a = a; + const struct hugepage_info *hpi_b = b; + + return hpi_b->hugepage_sz - hpi_a->hugepage_sz; +} + +/* + * when we initialize the hugepage info, everything goes + * to socket 0 by default. it will later get sorted by memory + * initialization procedure. + */ +int +eal_hugepage_info_init(void) +{ + const char dirent_start_text[] = "hugepages-"; + const size_t dirent_start_len = sizeof(dirent_start_text) - 1; + unsigned i, num_sizes = 0; + DIR *dir; + struct dirent *dirent; + + dir = opendir(sys_dir_path); + if (dir == NULL) + rte_panic("Cannot open directory %s to read system hugepage " + "info\n", sys_dir_path); + + for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { + struct hugepage_info *hpi; + + if (strncmp(dirent->d_name, dirent_start_text, + dirent_start_len) != 0) + continue; + + if (num_sizes >= MAX_HUGEPAGE_SIZES) + break; + + hpi = &internal_config.hugepage_info[num_sizes]; + hpi->hugepage_sz = + rte_str_to_size(&dirent->d_name[dirent_start_len]); + hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); + + /* first, check if we have a mountpoint */ + if (hpi->hugedir == NULL) { + uint32_t num_pages; + + num_pages = get_num_hugepages(dirent->d_name); + if (num_pages > 0) + RTE_LOG(NOTICE, EAL, + "%" PRIu32 " hugepages of size " + "%" PRIu64 " reserved, but no mounted " + "hugetlbfs found for that size\n", + num_pages, hpi->hugepage_sz); + continue; + } + + /* try to obtain a writelock */ + hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); + + /* if blocking lock failed */ + if (flock(hpi->lock_descriptor, LOCK_EX) == -1) { + RTE_LOG(CRIT, EAL, + "Failed to lock hugepage directory!\n"); + break; + } + /* clear out the hugepages dir from unused pages */ + if (clear_hugedir(hpi->hugedir) == -1) + break; + + /* for now, put all pages into socket 0, + * later they will be sorted */ + hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + +#ifndef RTE_ARCH_64 + /* for 32-bit systems, limit number of hugepages to + * 1GB per page size */ + hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0], + RTE_PGSIZE_1G / hpi->hugepage_sz); +#endif + + num_sizes++; + } + closedir(dir); + + /* something went wrong, and we broke from the for loop above */ + if (dirent != NULL) + return -1; + + internal_config.num_hugepage_sizes = num_sizes; + + /* sort the page directory entries by size, largest to smallest */ + qsort(&internal_config.hugepage_info[0], num_sizes, + sizeof(internal_config.hugepage_info[0]), compare_hpi); + + /* now we have all info, check we have at least one valid size */ + for (i = 0; i < num_sizes; i++) + if (internal_config.hugepage_info[i].hugedir != NULL && + internal_config.hugepage_info[i].num_pages[0] > 0) + return 0; + + /* no valid hugepage mounts available, return error */ + return -1; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c new file mode 100644 index 00000000..06b26a9e --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -0,0 +1,1224 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_vfio.h" +#include "eal_thread.h" + +#define EAL_INTR_EPOLL_WAIT_FOREVER (-1) +#define NB_OTHER_INTR 1 + +static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ + +/** + * union for pipe fds. + */ +union intr_pipefds{ + struct { + int pipefd[2]; + }; + struct { + int readfd; + int writefd; + }; +}; + +/** + * union buffer for reading on different devices + */ +union rte_intr_read_buffer { + int uio_intr_count; /* for uio device */ +#ifdef VFIO_PRESENT + uint64_t vfio_intr_count; /* for vfio device */ +#endif + uint64_t timerfd_num; /* for timerfd */ + char charbuf[16]; /* for others */ +}; + +TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); +TAILQ_HEAD(rte_intr_source_list, rte_intr_source); + +struct rte_intr_callback { + TAILQ_ENTRY(rte_intr_callback) next; + rte_intr_callback_fn cb_fn; /**< callback address */ + void *cb_arg; /**< parameter for callback */ +}; + +struct rte_intr_source { + TAILQ_ENTRY(rte_intr_source) next; + struct rte_intr_handle intr_handle; /**< interrupt handle */ + struct rte_intr_cb_list callbacks; /**< user callbacks */ + uint32_t active; +}; + +/* global spinlock for interrupt data operation */ +static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; + +/* union buffer for pipe read/write */ +static union intr_pipefds intr_pipe; + +/* interrupt sources list */ +static struct rte_intr_source_list intr_sources; + +/* interrupt handling thread */ +static pthread_t intr_thread; + +/* VFIO interrupts */ +#ifdef VFIO_PRESENT + +#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) +/* irq set buffer length for queue interrupts and LSC interrupt */ +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ + sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) + +/* enable legacy (INTx) interrupts */ +static int +vfio_enable_intx(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable legacy (INTx) interrupts */ +static int +vfio_disable_intx(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } + return 0; +} + +/* enable MSI interrupts */ +static int +vfio_enable_msi(struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + +/* enable MSI-X interrupts */ +static int +vfio_enable_msix(struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + if (!intr_handle->max_intr) + intr_handle->max_intr = 1; + else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) + intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; + + irq_set->count = intr_handle->max_intr; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +/* disable MSI-X interrupts */ +static int +vfio_disable_msix(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); + + return ret; +} +#endif + +static int +uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* disable interrupts */ + command_high |= 0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* enable interrupts */ + command_high &= ~0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intr_disable(struct rte_intr_handle *intr_handle) +{ + const int value = 0; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + +static int +uio_intr_enable(struct rte_intr_handle *intr_handle) +{ + const int value = 1; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + +int +rte_intr_callback_register(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg) +{ + int ret, wake_thread; + struct rte_intr_source *src; + struct rte_intr_callback *callback; + + wake_thread = 0; + + /* first do parameter checking */ + if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { + RTE_LOG(ERR, EAL, + "Registering with invalid input parameter\n"); + return -EINVAL; + } + + /* allocate a new interrupt callback entity */ + callback = rte_zmalloc("interrupt callback list", + sizeof(*callback), 0); + if (callback == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + return -ENOMEM; + } + callback->cb_fn = cb; + callback->cb_arg = cb_arg; + + rte_spinlock_lock(&intr_lock); + + /* check if there is at least one callback registered for the fd */ + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->intr_handle.fd == intr_handle->fd) { + /* we had no interrupts for this */ + if TAILQ_EMPTY(&src->callbacks) + wake_thread = 1; + + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + ret = 0; + break; + } + } + + /* no existing callbacks for this - add new source */ + if (src == NULL) { + if ((src = rte_zmalloc("interrupt source list", + sizeof(*src), 0)) == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + rte_free(callback); + ret = -ENOMEM; + } else { + src->intr_handle = *intr_handle; + TAILQ_INIT(&src->callbacks); + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + TAILQ_INSERT_TAIL(&intr_sources, src, next); + wake_thread = 1; + ret = 0; + } + } + + rte_spinlock_unlock(&intr_lock); + + /** + * check if need to notify the pipe fd waited by epoll_wait to + * rebuild the wait list. + */ + if (wake_thread) + if (write(intr_pipe.writefd, "1", 1) < 0) + return -EPIPE; + + return ret; +} + +int +rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_intr_source *src; + struct rte_intr_callback *cb, *next; + + /* do parameter checking first */ + if (intr_handle == NULL || intr_handle->fd < 0) { + RTE_LOG(ERR, EAL, + "Unregistering with invalid input parameter\n"); + return -EINVAL; + } + + rte_spinlock_lock(&intr_lock); + + /* check if the insterrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; + + /* No interrupt source registered for the fd */ + if (src == NULL) { + ret = -ENOENT; + + /* interrupt source has some active callbacks right now. */ + } else if (src->active != 0) { + ret = -EAGAIN; + + /* ok to remove. */ + } else { + ret = 0; + + /*walk through the callbacks and remove all that match. */ + for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || + cb->cb_arg == cb_arg)) { + TAILQ_REMOVE(&src->callbacks, cb, next); + rte_free(cb); + ret++; + } + } + + /* all callbacks for that source are removed. */ + if (TAILQ_EMPTY(&src->callbacks)) { + TAILQ_REMOVE(&intr_sources, src, next); + rte_free(src); + } + } + + rte_spinlock_unlock(&intr_lock); + + /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ + if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { + ret = -EPIPE; + } + + return ret; +} + +int +rte_intr_enable(struct rte_intr_handle *intr_handle) +{ + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type){ + /* write to the uio fd to enable the interrupt */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_enable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_enable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_enable_intx(intr_handle)) + return -1; + break; +#endif + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +int +rte_intr_disable(struct rte_intr_handle *intr_handle) +{ + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type){ + /* write to the uio fd to disable the interrupt */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_disable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_disable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_disable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_disable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_disable_intx(intr_handle)) + return -1; + break; +#endif + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +static int +eal_intr_process_interrupts(struct epoll_event *events, int nfds) +{ + int n, bytes_read; + struct rte_intr_source *src; + struct rte_intr_callback *cb; + union rte_intr_read_buffer buf; + struct rte_intr_callback active_cb; + + for (n = 0; n < nfds; n++) { + + /** + * if the pipe fd is ready to read, return out to + * rebuild the wait list. + */ + if (events[n].data.fd == intr_pipe.readfd){ + int r = read(intr_pipe.readfd, buf.charbuf, + sizeof(buf.charbuf)); + RTE_SET_USED(r); + return -1; + } + rte_spinlock_lock(&intr_lock); + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == + events[n].data.fd) + break; + if (src == NULL){ + rte_spinlock_unlock(&intr_lock); + continue; + } + + /* mark this interrupt source as active and release the lock. */ + src->active = 1; + rte_spinlock_unlock(&intr_lock); + + /* set the length to be read dor different handle type */ + switch (src->intr_handle.type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); + break; + case RTE_INTR_HANDLE_ALARM: + bytes_read = sizeof(buf.timerfd_num); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + case RTE_INTR_HANDLE_EXT: + default: + bytes_read = 1; + break; + } + + if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) { + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + bytes_read = read(events[n].data.fd, &buf, bytes_read); + if (bytes_read < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + events[n].data.fd, + strerror(errno)); + } else if (bytes_read == 0) + RTE_LOG(ERR, EAL, "Read nothing from file " + "descriptor %d\n", events[n].data.fd); + } + + /* grab a lock, again to call callbacks and update status. */ + rte_spinlock_lock(&intr_lock); + + if (bytes_read > 0) { + + /* Finally, call all callbacks. */ + TAILQ_FOREACH(cb, &src->callbacks, next) { + + /* make a copy and unlock. */ + active_cb = *cb; + rte_spinlock_unlock(&intr_lock); + + /* call the actual callback */ + active_cb.cb_fn(&src->intr_handle, + active_cb.cb_arg); + + /*get the lock back. */ + rte_spinlock_lock(&intr_lock); + } + } + + /* we done with that interrupt source, release it. */ + src->active = 0; + rte_spinlock_unlock(&intr_lock); + } + + return 0; +} + +/** + * It handles all the interrupts. + * + * @param pfd + * epoll file descriptor. + * @param totalfds + * The number of file descriptors added in epoll. + * + * @return + * void + */ +static void +eal_intr_handle_interrupts(int pfd, unsigned totalfds) +{ + struct epoll_event events[totalfds]; + int nfds = 0; + + for(;;) { + nfds = epoll_wait(pfd, events, totalfds, + EAL_INTR_EPOLL_WAIT_FOREVER); + /* epoll_wait fail */ + if (nfds < 0) { + if (errno == EINTR) + continue; + RTE_LOG(ERR, EAL, + "epoll_wait returns with fail\n"); + return; + } + /* epoll_wait timeout, will never happens here */ + else if (nfds == 0) + continue; + /* epoll_wait has at least one fd ready to read */ + if (eal_intr_process_interrupts(events, nfds) < 0) + return; + } +} + +/** + * It builds/rebuilds up the epoll file descriptor with all the + * file descriptors being waited on. Then handles the interrupts. + * + * @param arg + * pointer. (unused) + * + * @return + * never return; + */ +static __attribute__((noreturn)) void * +eal_intr_thread_main(__rte_unused void *arg) +{ + struct epoll_event ev; + + /* host thread, never break out */ + for (;;) { + /* build up the epoll fd with all descriptors we are to + * wait on then pass it to the handle_interrupts function + */ + static struct epoll_event pipe_event = { + .events = EPOLLIN | EPOLLPRI, + }; + struct rte_intr_source *src; + unsigned numfds = 0; + + /* create epoll fd */ + int pfd = epoll_create(1); + if (pfd < 0) + rte_panic("Cannot create epoll instance\n"); + + pipe_event.data.fd = intr_pipe.readfd; + /** + * add pipe fd into wait list, this pipe is used to + * rebuild the wait list. + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, + &pipe_event) < 0) { + rte_panic("Error adding fd to %d epoll_ctl, %s\n", + intr_pipe.readfd, strerror(errno)); + } + numfds++; + + rte_spinlock_lock(&intr_lock); + + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->callbacks.tqh_first == NULL) + continue; /* skip those with no callbacks */ + ev.events = EPOLLIN | EPOLLPRI; + ev.data.fd = src->intr_handle.fd; + + /** + * add all the uio device file descriptor + * into wait list. + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, + src->intr_handle.fd, &ev) < 0){ + rte_panic("Error adding fd %d epoll_ctl, %s\n", + src->intr_handle.fd, strerror(errno)); + } + else + numfds++; + } + rte_spinlock_unlock(&intr_lock); + /* serve the interrupt */ + eal_intr_handle_interrupts(pfd, numfds); + + /** + * when we return, we need to rebuild the + * list of fds to monitor. + */ + close(pfd); + } +} + +int +rte_eal_intr_init(void) +{ + int ret = 0, ret_1 = 0; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + /* init the global interrupt source head */ + TAILQ_INIT(&intr_sources); + + /** + * create a pipe which will be waited by epoll and notified to + * rebuild the wait list of epoll. + */ + if (pipe(intr_pipe.pipefd) < 0) + return -1; + + /* create the host thread to wait/handle the interrupt */ + ret = pthread_create(&intr_thread, NULL, + eal_intr_thread_main, NULL); + if (ret != 0) { + RTE_LOG(ERR, EAL, + "Failed to create thread for interrupt handling\n"); + } else { + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "eal-intr-thread"); + ret_1 = rte_thread_setname(intr_thread, thread_name); + if (ret_1 != 0) + RTE_LOG(ERR, EAL, + "Failed to set thread name for interrupt handling\n"); + } + + return -ret; +} + +static void +eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) +{ + union rte_intr_read_buffer buf; + int bytes_read = 1; + int nbytes; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + default: + bytes_read = 1; + RTE_LOG(INFO, EAL, "unexpected intr type\n"); + break; + } + + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + do { + nbytes = read(fd, &buf, bytes_read); + if (nbytes < 0) { + if (errno == EINTR || errno == EWOULDBLOCK || + errno == EAGAIN) + continue; + RTE_LOG(ERR, EAL, + "Error reading from fd %d: %s\n", + fd, strerror(errno)); + } else if (nbytes == 0) + RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); + return; + } while (1); +} + +static int +eal_epoll_process_event(struct epoll_event *evs, unsigned int n, + struct rte_epoll_event *events) +{ + unsigned int i, count = 0; + struct rte_epoll_event *rev; + + for (i = 0; i < n; i++) { + rev = evs[i].data.ptr; + if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID, + RTE_EPOLL_EXEC)) + continue; + + events[count].status = RTE_EPOLL_VALID; + events[count].fd = rev->fd; + events[count].epfd = rev->epfd; + events[count].epdata.event = rev->epdata.event; + events[count].epdata.data = rev->epdata.data; + if (rev->epdata.cb_fun) + rev->epdata.cb_fun(rev->fd, + rev->epdata.cb_arg); + + rte_compiler_barrier(); + rev->status = RTE_EPOLL_VALID; + count++; + } + return count; +} + +static inline int +eal_init_tls_epfd(void) +{ + int pfd = epoll_create(255); + + if (pfd < 0) { + RTE_LOG(ERR, EAL, + "Cannot create epoll instance\n"); + return -1; + } + return pfd; +} + +int +rte_intr_tls_epfd(void) +{ + if (RTE_PER_LCORE(_epfd) == -1) + RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); + + return RTE_PER_LCORE(_epfd); +} + +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout) +{ + struct epoll_event evs[maxevents]; + int rc; + + if (!events) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + while (1) { + rc = epoll_wait(epfd, evs, maxevents, timeout); + if (likely(rc > 0)) { + /* epoll_wait has at least one fd ready to read */ + rc = eal_epoll_process_event(evs, rc, events); + break; + } else if (rc < 0) { + if (errno == EINTR) + continue; + /* epoll_wait fail */ + RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", + strerror(errno)); + rc = -1; + break; + } else { + /* rc == 0, epoll_wait timed out */ + break; + } + } + + return rc; +} + +static inline void +eal_epoll_data_safe_free(struct rte_epoll_event *ev) +{ + while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID, + RTE_EPOLL_INVALID)) + while (ev->status != RTE_EPOLL_VALID) + rte_pause(); + memset(&ev->epdata, 0, sizeof(ev->epdata)); + ev->fd = -1; + ev->epfd = -1; +} + +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event) +{ + struct epoll_event ev; + + if (!event) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + if (op == EPOLL_CTL_ADD) { + event->status = RTE_EPOLL_VALID; + event->fd = fd; /* ignore fd in event */ + event->epfd = epfd; + ev.data.ptr = (void *)event; + } + + ev.events = event->epdata.event; + if (epoll_ctl(epfd, op, fd, &ev) < 0) { + RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", + op, fd, strerror(errno)); + if (op == EPOLL_CTL_ADD) + /* rollback status when CTL_ADD fail */ + event->status = RTE_EPOLL_INVALID; + return -1; + } + + if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID) + eal_epoll_data_safe_free(event); + + return 0; +} + +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, + int op, unsigned int vec, void *data) +{ + struct rte_epoll_event *rev; + struct rte_epoll_data *epdata; + int epfd_op; + unsigned int efd_idx; + int rc = 0; + + efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? + (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; + + if (!intr_handle || intr_handle->nb_efd == 0 || + efd_idx >= intr_handle->nb_efd) { + RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); + return -EPERM; + } + + switch (op) { + case RTE_INTR_EVENT_ADD: + epfd_op = EPOLL_CTL_ADD; + rev = &intr_handle->elist[efd_idx]; + if (rev->status != RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event already been added.\n"); + return -EEXIST; + } + + /* attach to intr vector fd */ + epdata = &rev->epdata; + epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; + epdata->data = data; + epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; + epdata->cb_arg = (void *)intr_handle; + rc = rte_epoll_ctl(epfd, epfd_op, + intr_handle->efds[efd_idx], rev); + if (!rc) + RTE_LOG(DEBUG, EAL, + "efd %d associated with vec %d added on epfd %d" + "\n", rev->fd, vec, epfd); + else + rc = -EPERM; + break; + case RTE_INTR_EVENT_DEL: + epfd_op = EPOLL_CTL_DEL; + rev = &intr_handle->elist[efd_idx]; + if (rev->status == RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event does not exist.\n"); + return -EPERM; + } + + rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); + if (rc) + rc = -EPERM; + break; + default: + RTE_LOG(ERR, EAL, "event op type mismatch\n"); + rc = -EPERM; + } + + return rc; +} + +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) +{ + uint32_t i; + int fd; + uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + + assert(nb_efd != 0); + + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) { + for (i = 0; i < n; i++) { + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, + "can't setup eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + intr_handle->efds[i] = fd; + } + intr_handle->nb_efd = n; + intr_handle->max_intr = NB_OTHER_INTR + n; + } else { + intr_handle->efds[0] = intr_handle->fd; + intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); + intr_handle->max_intr = NB_OTHER_INTR; + } + + return 0; +} + +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + struct rte_epoll_event *rev; + + for (i = 0; i < intr_handle->nb_efd; i++) { + rev = &intr_handle->elist[i]; + if (rev->status == RTE_EPOLL_INVALID) + continue; + if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { + /* force free if the entry valid */ + eal_epoll_data_safe_free(rev); + rev->status = RTE_EPOLL_INVALID; + } + } + + if (intr_handle->max_intr > intr_handle->nb_efd) { + for (i = 0; i < intr_handle->nb_efd; i++) + close(intr_handle->efds[i]); + } + intr_handle->nb_efd = 0; + intr_handle->max_intr = 0; +} + +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) +{ + return !(!intr_handle->nb_efd); +} + +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle) +{ + if (!rte_intr_dp_is_en(intr_handle)) + return 1; + else + return !!(intr_handle->max_intr - intr_handle->nb_efd); +} + +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) +{ + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) + return 1; + + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c new file mode 100644 index 00000000..07aec694 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c @@ -0,0 +1,955 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_internal_cfg.h" +#include "eal_private.h" + +#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 +#define PCI_DEVICE_ID_IVSHMEM 0x1110 + +#define IVSHMEM_MAGIC 0x0BADC0DE + +#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" +#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" + +#define PHYS 0x1 +#define VIRT 0x2 +#define IOREMAP 0x4 +#define FULL (PHYS|VIRT|IOREMAP) + +#define METADATA_SIZE_ALIGNED \ + (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) + +#define CONTAINS(x,y)\ + (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) + +#define DIM(x) (sizeof(x)/sizeof(x[0])) + +struct ivshmem_pci_device { + char path[PATH_MAX]; + phys_addr_t ioremap_addr; +}; + +/* data type to store in config */ +struct ivshmem_segment { + struct rte_ivshmem_metadata_entry entry; + uint64_t align; + char path[PATH_MAX]; +}; +struct ivshmem_shared_config { + struct ivshmem_segment segment[RTE_MAX_MEMSEG]; + uint32_t segment_idx; + struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; + uint32_t pci_devs_idx; +}; +static struct ivshmem_shared_config * ivshmem_config; +static int memseg_idx; +static int pagesz; + +/* Tailq heads to add rings to */ +TAILQ_HEAD(rte_ring_list, rte_tailq_entry); + +/* + * Utility functions + */ + +static int +is_ivshmem_device(struct rte_pci_device * dev) +{ + return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM + && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM; +} + +static void * +map_metadata(int fd, uint64_t len) +{ + size_t metadata_len = sizeof(struct rte_ivshmem_metadata); + size_t aligned_len = METADATA_SIZE_ALIGNED; + + return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, len - aligned_len); +} + +static void +unmap_metadata(void * ptr) +{ + munmap(ptr, sizeof(struct rte_ivshmem_metadata)); +} + +static int +has_ivshmem_metadata(int fd, uint64_t len) +{ + struct rte_ivshmem_metadata metadata; + void * ptr; + + ptr = map_metadata(fd, len); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + return metadata.magic_number == IVSHMEM_MAGIC; +} + +static void +remove_segment(struct ivshmem_segment * ms, int len, int idx) +{ + int i; + + for (i = idx; i < len - 1; i++) + memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); + memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); +} + +static int +overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check for overlap in virtual addresses */ + if (start1 > start2 && start1 < end2) + result |= VIRT; + if (start2 >= start1 && start2 < end1) + result |= VIRT; + + /* check for overlap in physical addresses */ + if (p_start1 > p_start2 && p_start1 < p_end2) + result |= PHYS; + if (p_start2 > p_start1 && p_start2 < p_end1) + result |= PHYS; + + /* check for overlap in ioremap addresses */ + if (i_start1 > i_start2 && i_start1 < i_end2) + result |= IOREMAP; + if (i_start2 > i_start1 && i_start2 < i_end1) + result |= IOREMAP; + + return result; +} + +static int +adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check if segments are virtually adjacent */ + if (start1 == end2) + result |= VIRT; + if (start2 == end1) + result |= VIRT; + + /* check if segments are physically adjacent */ + if (p_start1 == p_end2) + result |= PHYS; + if (p_start2 == p_end1) + result |= PHYS; + + /* check if segments are ioremap-adjacent */ + if (i_start1 == i_end2) + result |= IOREMAP; + if (i_start2 == i_end1) + result |= IOREMAP; + + return result; +} + +static int +has_adjacent_segments(struct ivshmem_segment * ms, int len) +{ + int i, j; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) { + /* we're only interested in fully adjacent segments; partially + * adjacent segments can coexist. + */ + if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL) + return 1; + } + return 0; +} + +static int +has_overlapping_segments(struct ivshmem_segment * ms, int len) +{ + int i, j; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) + if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) + return 1; + return 0; +} + +static int +seg_compare(const void * a, const void * b) +{ + const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; + const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; + + /* move unallocated zones to the end */ + if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) + return 0; + if (s1->entry.mz.addr == 0) + return 1; + if (s2->entry.mz.addr == 0) + return -1; + + return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; +} + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG +static void +entry_dump(struct rte_ivshmem_metadata_entry *e) +{ + RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, + RTE_PTR_ADD(e->mz.addr, e->mz.len)); + RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.phys_addr, + e->mz.phys_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.ioremap_addr, + e->mz.ioremap_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); + RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); +} +#endif + + + +/* + * Actual useful code + */ + +/* read through metadata mapped from the IVSHMEM device */ +static int +read_metadata(char * path, int path_len, int fd, uint64_t flen) +{ + struct rte_ivshmem_metadata metadata; + struct rte_ivshmem_metadata_entry * entry; + int idx, i; + void * ptr; + + ptr = map_metadata(fd, flen); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); + + idx = ivshmem_config->segment_idx; + + for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && + idx <= RTE_MAX_MEMSEG; i++) { + + if (idx == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); + return -1; + } + + entry = &metadata.entry[i]; + + /* stop on uninitialized memzone */ + if (entry->mz.len == 0) + break; + + /* copy metadata entry */ + memcpy(&ivshmem_config->segment[idx].entry, entry, + sizeof(struct rte_ivshmem_metadata_entry)); + + /* copy path */ + snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); + + idx++; + } + ivshmem_config->segment_idx = idx; + + return 0; +} + +/* check through each segment and look for adjacent or overlapping ones. */ +static int +cleanup_segments(struct ivshmem_segment * ms, int tbl_len) +{ + struct ivshmem_segment * s, * tmp; + int i, j, concat, seg_adjacent, seg_overlapping; + uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; + + qsort(ms, tbl_len, sizeof(struct ivshmem_segment), + seg_compare); + + while (has_overlapping_segments(ms, tbl_len) || + has_adjacent_segments(ms, tbl_len)) { + + for (i = 0; i < tbl_len; i++) { + s = &ms[i]; + + concat = 0; + + for (j = i + 1; j < tbl_len; j++) { + tmp = &ms[j]; + + /* check if this segment is overlapping with existing segment, + * or is adjacent to existing segment */ + seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); + seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); + + /* check if segments fully overlap or are fully adjacent */ + if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + + start1 = s->entry.mz.addr_64; + start2 = tmp->entry.mz.addr_64; + p_start1 = s->entry.mz.phys_addr; + p_start2 = tmp->entry.mz.phys_addr; + i_start1 = s->entry.mz.ioremap_addr; + i_start2 = tmp->entry.mz.ioremap_addr; + end1 = s->entry.mz.addr_64 + s->entry.mz.len; + end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; + + /* settle for minimum start address and maximum length */ + s->entry.mz.addr_64 = RTE_MIN(start1, start2); + s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); + s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); + s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); + s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; + concat = 1; + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); + entry_dump(&s->entry); + +#endif + } + /* if segments not fully overlap, we have an error condition. + * adjacent segments can coexist. + */ + else if (seg_overlapping > 0) { + RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + return -1; + } + if (concat) + break; + } + /* if we concatenated, remove segment at j */ + if (concat) { + remove_segment(ms, tbl_len, j); + tbl_len--; + break; + } + } + } + + return tbl_len; +} + +static int +create_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_CREAT | O_RDWR, 0600); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the file is locked, we have a problem */ + if (flock(fd, LOCK_EX | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); + close(fd); + return -1; + } + + if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) { + RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno)); + return -1; + } + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); + + /* change the exclusive lock we got earlier to a shared lock */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* open shared config file and, if present, map the config. + * having no config file is not an error condition, as we later check if + * ivshmem_config is NULL (if it is, that means nothing was mapped). */ +static int +open_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_RDONLY); + + /* if the file doesn't exist, just return success */ + if (fd < 0 && errno == ENOENT) + return 0; + /* else we have an error condition */ + else if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", + path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the lock *does* succeed, this means it's a + * stray config file, so it should be deleted. + */ + if (flock(fd, LOCK_EX | LOCK_NB) != -1) { + + /* if we can't remove the file, something is wrong */ + if (unlink(path) < 0) { + RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, + strerror(errno)); + return -1; + } + + /* release the lock */ + flock(fd, LOCK_UN); + close(fd); + + /* return success as having a stray config file is equivalent to not + * having config file at all. + */ + return 0; + } + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + /* place a shared lock on config file */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* + * This function does the following: + * + * 1) Builds a table of ivshmem_segments with proper offset alignment + * 2) Cleans up that table so that we don't have any overlapping or adjacent + * memory segments + * 3) Creates memsegs from this table and maps them into memory. + */ +static inline int +map_all_segments(void) +{ + struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; + struct ivshmem_pci_device * pci_dev; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + int fd, fd_zero; + unsigned i, j; + struct rte_memzone mz; + struct rte_memseg ms; + void * base_addr; + uint64_t align, len; + phys_addr_t ioremap_addr; + + ioremap_addr = 0; + + memset(ms_tbl, 0, sizeof(ms_tbl)); + memset(&mz, 0, sizeof(struct rte_memzone)); + memset(&ms, 0, sizeof(struct rte_memseg)); + + /* first, build a table of memsegs to map, to avoid failed mmaps due to + * overlaps + */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { + if (i == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Too many segments requested!\n"); + return -1; + } + + seg = &ivshmem_config->segment[i]; + + /* copy segment to table */ + memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); + + /* find ioremap addr */ + for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { + pci_dev = &ivshmem_config->pci_devs[j]; + if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { + ioremap_addr = pci_dev->ioremap_addr; + break; + } + } + if (ioremap_addr == 0) { + RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); + return -1; + } + + /* work out alignments */ + align = seg->entry.mz.addr_64 - + RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); + len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); + + /* save original alignments */ + ms_tbl[i].align = align; + + /* create a memory zone */ + mz.addr_64 = seg->entry.mz.addr_64 - align; + mz.len = len; + mz.hugepage_sz = seg->entry.mz.hugepage_sz; + mz.phys_addr = seg->entry.mz.phys_addr - align; + + /* find true physical address */ + mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; + + ms_tbl[i].entry.offset = seg->entry.offset - align; + + memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); + } + + /* clean up the segments */ + memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); + + if (memseg_idx < 0) + return -1; + + mcfg = rte_eal_get_configuration()->mem_config; + + fd_zero = open("/dev/zero", O_RDWR); + + if (fd_zero < 0) { + RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); + return -1; + } + + /* create memsegs and put them into DPDK memory */ + for (i = 0; i < (unsigned) memseg_idx; i++) { + + seg = &ms_tbl[i]; + + ms.addr_64 = seg->entry.mz.addr_64; + ms.hugepage_sz = seg->entry.mz.hugepage_sz; + ms.len = seg->entry.mz.len; + ms.nchannel = rte_memory_get_nchannel(); + ms.nrank = rte_memory_get_nrank(); + ms.phys_addr = seg->entry.mz.phys_addr; + ms.ioremap_addr = seg->entry.mz.ioremap_addr; + ms.socket_id = seg->entry.mz.socket_id; + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); + return -1; + } + + fd = open(seg->path, O_RDWR); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, + strerror(errno)); + return -1; + } + + munmap(ms.addr, ms.len); + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, + seg->entry.offset); + + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map segment into memory: " + "expected %p got %p (%s)\n", ms.addr, base_addr, + strerror(errno)); + return -1; + } + + RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " + "offset 0x%" PRIx64 "\n", + ms.addr, ms.len, seg->entry.offset); + + /* put the pointers back into their real positions using original + * alignment */ + ms.addr_64 += seg->align; + ms.phys_addr += seg->align; + ms.ioremap_addr += seg->align; + ms.len -= seg->align; + + /* at this point, the rest of DPDK memory is not initialized, so we + * expect memsegs to be empty */ + memcpy(&mcfg->memseg[i], &ms, + sizeof(struct rte_memseg)); + + close(fd); + + RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", + ms.len); + } + + return 0; +} + +/* this happens at a later stage, after general EAL memory initialization */ +int +rte_eal_ivshmem_obj_init(void) +{ + struct rte_ring_list* ring_list = NULL; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + struct rte_memzone * mz; + struct rte_ring * r; + struct rte_tailq_entry *te; + unsigned i, ms, idx; + uint64_t offset; + + /* secondary process would not need any object discovery - it'll all + * already be in shared config */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) + return 0; + + /* check that we have an initialised ring tail queue */ + ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list); + if (ring_list == NULL) { + RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); + return -1; + } + + mcfg = rte_eal_get_configuration()->mem_config; + + /* create memzones */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { + + seg = &ivshmem_config->segment[i]; + + /* add memzone */ + if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) { + RTE_LOG(ERR, EAL, "No more memory zones available!\n"); + return -1; + } + + idx = mcfg->memzone_cnt; + + RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", + seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); + + memcpy(&mcfg->memzone[idx], &seg->entry.mz, + sizeof(struct rte_memzone)); + + /* find ioremap address */ + for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { + if (ms == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); + return -1; + } + if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { + offset = mcfg->memzone[idx].addr_64 - + mcfg->memseg[ms].addr_64; + mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + + offset; + break; + } + } + + mcfg->memzone_cnt++; + } + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find rings */ + for (i = 0; i < mcfg->memzone_cnt; i++) { + mz = &mcfg->memzone[i]; + + /* check if memzone has a ring prefix */ + if (strncmp(mz->name, RTE_RING_MZ_PREFIX, + sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) + continue; + + r = (struct rte_ring*) (mz->addr_64); + + te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n"); + return -1; + } + + te->data = (void *) r; + + TAILQ_INSERT_TAIL(ring_list, te, next); + + RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); + } + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + rte_memzone_dump(stdout); + rte_ring_list_dump(stdout); +#endif + + return 0; +} + +/* initialize ivshmem structures */ +int rte_eal_ivshmem_init(void) +{ + struct rte_pci_device * dev; + struct rte_pci_resource * res; + int fd, ret; + char path[PATH_MAX]; + + /* initialize everything to 0 */ + memset(path, 0, sizeof(path)); + ivshmem_config = NULL; + + pagesz = getpagesize(); + + RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + + if (open_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); + return -1; + } + } + else { + + TAILQ_FOREACH(dev, &pci_device_list, next) { + + if (is_ivshmem_device(dev)) { + + /* IVSHMEM memory is always on BAR2 */ + res = &dev->mem_resource[2]; + + /* if we don't have a BAR2 */ + if (res->len == 0) + continue; + + /* construct pci device path */ + snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, + dev->addr.domain, dev->addr.bus, dev->addr.devid, + dev->addr.function); + + /* try to find memseg */ + fd = open(path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", path); + return -1; + } + + /* check if it's a DPDK IVSHMEM device */ + ret = has_ivshmem_metadata(fd, res->len); + + /* is DPDK device */ + if (ret == 1) { + + /* config file creation is deferred until the first + * DPDK device is found. then, it has to be created + * only once. */ + if (ivshmem_config == NULL && + create_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); + close(fd); + return -1; + } + + if (read_metadata(path, sizeof(path), fd, res->len) < 0) { + RTE_LOG(ERR, EAL, "Could not read metadata from" + " device %02x:%02x.%x!\n", dev->addr.bus, + dev->addr.devid, dev->addr.function); + close(fd); + return -1; + } + + if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { + RTE_LOG(WARNING, EAL, + "IVSHMEM PCI device limit exceeded. Increase " + "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " + "your config file.\n"); + break; + } + + RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; + snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, + sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), + "%s", path); + + ivshmem_config->pci_devs_idx++; + } + /* failed to read */ + else if (ret < 0) { + RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", + strerror(errno)); + close(fd); + return -1; + } + /* not a DPDK device */ + else + RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); + + /* close the BAR fd */ + close(fd); + } + } + } + + /* ivshmem_config is not NULL only if config was created and/or mapped */ + if (ivshmem_config) { + if (map_all_segments() < 0) { + RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); + return -1; + } + } + else { + RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); + } + + return 0; +} + +#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_lcore.c b/lib/librte_eal/linuxapp/eal/eal_lcore.c new file mode 100644 index 00000000..de5b4260 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_lcore.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_filesystem.h" +#include "eal_thread.h" + +#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u" +#define CORE_ID_FILE "topology/core_id" +#define NUMA_NODE_PATH "/sys/devices/system/node" + +/* Check if a cpu is present by the presence of the cpu information for it */ +int +eal_cpu_detected(unsigned lcore_id) +{ + char path[PATH_MAX]; + int len = snprintf(path, sizeof(path), SYS_CPU_DIR + "/"CORE_ID_FILE, lcore_id); + if (len <= 0 || (unsigned)len >= sizeof(path)) + return 0; + if (access(path, F_OK) != 0) + return 0; + + return 1; +} + +/* + * Get CPU socket id (NUMA node) for a logical core. + * + * This searches each nodeX directories in /sys for the symlink for the given + * lcore_id and returns the numa node where the lcore is found. If lcore is not + * found on any numa node, returns zero. + */ +unsigned +eal_cpu_socket_id(unsigned lcore_id) +{ + unsigned socket; + + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH, + socket, lcore_id); + if (access(path, F_OK) == 0) + return socket; + } + return 0; +} + +/* Get the cpu core id value from the /sys/.../cpuX core_id value */ +unsigned +eal_cpu_core_id(unsigned lcore_id) +{ + char path[PATH_MAX]; + unsigned long id; + + int len = snprintf(path, sizeof(path), SYS_CPU_DIR "/%s", lcore_id, CORE_ID_FILE); + if (len <= 0 || (unsigned)len >= sizeof(path)) + goto err; + if (eal_parse_sysfs_value(path, &id) != 0) + goto err; + return (unsigned)id; + +err: + RTE_LOG(ERR, EAL, "Error reading core id value from %s " + "for lcore %u - assuming core 0\n", SYS_CPU_DIR, lcore_id); + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c new file mode 100644 index 00000000..0b133c3e --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -0,0 +1,146 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +/* + * default log function, used once mempool (hence log history) is + * available + */ +static ssize_t +console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) +{ + char copybuf[BUFSIZ + 1]; + ssize_t ret; + uint32_t loglevel; + + /* add this log in history */ + rte_log_add_in_history(buf, size); + + /* write on stdout */ + ret = fwrite(buf, 1, size, stdout); + fflush(stdout); + + /* truncate message if too big (should not happen) */ + if (size > BUFSIZ) + size = BUFSIZ; + + /* Syslog error levels are from 0 to 7, so subtract 1 to convert */ + loglevel = rte_log_cur_msg_loglevel() - 1; + memcpy(copybuf, buf, size); + copybuf[size] = '\0'; + + /* write on syslog too */ + syslog(loglevel, "%s", copybuf); + + return ret; +} + +static cookie_io_functions_t console_log_func = { + .write = console_log_write, +}; + +/* + * set the log to default function, called during eal init process, + * once memzones are available. + */ +int +rte_eal_log_init(const char *id, int facility) +{ + FILE *log_stream; + + log_stream = fopencookie(NULL, "w+", console_log_func); + if (log_stream == NULL) + return -1; + + openlog(id, LOG_NDELAY | LOG_PID, facility); + + if (rte_eal_common_log_init(log_stream) < 0) + return -1; + + return 0; +} + +/* early logs */ + +/* + * early log function, used during boot when mempool (hence log + * history) is not available + */ +static ssize_t +early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) +{ + ssize_t ret; + ret = fwrite(buf, size, 1, stdout); + fflush(stdout); + if (ret == 0) + return -1; + return ret; +} + +static cookie_io_functions_t early_log_func = { + .write = early_log_write, +}; +static FILE *early_log_stream; + +/* + * init the log library, called by rte_eal_init() to enable early + * logs + */ +int +rte_eal_log_early_init(void) +{ + early_log_stream = fopencookie(NULL, "w+", early_log_func); + if (early_log_stream == NULL) { + printf("Cannot configure early_log_stream\n"); + return -1; + } + rte_openlog_stream(early_log_stream); + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c new file mode 100644 index 00000000..5b9132c6 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -0,0 +1,1559 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" + +#ifdef RTE_LIBRTE_XEN_DOM0 +int rte_xen_dom0_supported(void) +{ + return internal_config.xen_dom0_support; +} +#endif + +/** + * @file + * Huge page mapping under linux + * + * To reserve a big contiguous amount of memory, we use the hugepage + * feature of linux. For that, we need to have hugetlbfs mounted. This + * code will create many files in this directory (one per page) and + * map them in virtual memory. For each page, we will retrieve its + * physical address and remap it in order to have a virtual contiguous + * zone as well as a physical contiguous zone. + */ + +static uint64_t baseaddr_offset; + +static unsigned proc_pagemap_readable; + +#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" + +static void +test_proc_pagemap_readable(void) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + + if (fd < 0) { + RTE_LOG(ERR, EAL, + "Cannot open /proc/self/pagemap: %s. " + "virt2phys address translation will not work\n", + strerror(errno)); + return; + } + + /* Is readable */ + close(fd); + proc_pagemap_readable = 1; +} + +/* Lock page in physical memory and prevent from swapping. */ +int +rte_mem_lock_page(const void *virt) +{ + unsigned long virtual = (unsigned long)virt; + int page_size = getpagesize(); + unsigned long aligned = (virtual & ~ (page_size - 1)); + return mlock((void*)aligned, page_size); +} + +/* + * Get physical address of any mapped virtual address in the current process. + */ +phys_addr_t +rte_mem_virt2phy(const void *virtaddr) +{ + int fd; + uint64_t page, physaddr; + unsigned long virt_pfn; + int page_size; + off_t offset; + + /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ + if (!proc_pagemap_readable) + return RTE_BAD_PHYS_ADDR; + + /* standard page size */ + page_size = getpagesize(); + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + return RTE_BAD_PHYS_ADDR; + } + + virt_pfn = (unsigned long)virtaddr / page_size; + offset = sizeof(uint64_t) * virt_pfn; + if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { + RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return RTE_BAD_PHYS_ADDR; + } + if (read(fd, &page, sizeof(uint64_t)) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return RTE_BAD_PHYS_ADDR; + } + + /* + * the pfn (page frame number) are bits 0-54 (see + * pagemap.txt in linux Documentation) + */ + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + + ((unsigned long)virtaddr % page_size); + close(fd); + return physaddr; +} + +/* + * For each hugepage in hugepg_tbl, fill the physaddr value. We find + * it by browsing the /proc/self/pagemap special file. + */ +static int +find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va); + if (addr == RTE_BAD_PHYS_ADDR) + return -1; + hugepg_tbl[i].physaddr = addr; + } + return 0; +} + +/* + * Check whether address-space layout randomization is enabled in + * the kernel. This is important for multi-process as it can prevent + * two processes mapping data to the same virtual address + * Returns: + * 0 - address space randomization disabled + * 1/2 - address space randomization enabled + * negative error code on error + */ +static int +aslr_enabled(void) +{ + char c; + int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY); + if (fd < 0) + return -errno; + retval = read(fd, &c, 1); + close(fd); + if (retval < 0) + return -errno; + if (retval == 0) + return -EIO; + switch (c) { + case '0' : return 0; + case '1' : return 1; + case '2' : return 2; + default: return -EINVAL; + } +} + +/* + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by hugepage_sz until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of hugepage size. + */ +static void * +get_virtual_area(size_t *size, size_t hugepage_sz) +{ + void *addr; + int fd; + long aligned_addr; + + if (internal_config.base_virtaddr != 0) { + addr = (void*) (uintptr_t) (internal_config.base_virtaddr + + baseaddr_offset); + } + else addr = NULL; + + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0){ + RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); + return NULL; + } + do { + addr = mmap(addr, + (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) + *size -= hugepage_sz; + } while (addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { + close(fd); + RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", + strerror(errno)); + return NULL; + } + + munmap(addr, (*size) + hugepage_sz); + close(fd); + + /* align addr to a huge page size boundary */ + aligned_addr = (long)addr; + aligned_addr += (hugepage_sz - 1); + aligned_addr &= (~(hugepage_sz - 1)); + addr = (void *)(aligned_addr); + + RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", + addr, *size); + + /* increment offset */ + baseaddr_offset += *size; + + return addr; +} + +/* + * Mmap all hugepages of hugepage table: it first open a file in + * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the + * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored + * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to + * map continguous physical blocks in contiguous virtual blocks. + */ +static int +map_all_hugepages(struct hugepage_file *hugepg_tbl, + struct hugepage_info *hpi, int orig) +{ + int fd; + unsigned i; + void *virtaddr; + void *vma_addr = NULL; + size_t vma_len = 0; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + RTE_SET_USED(vma_len); +#endif + + for (i = 0; i < hpi->num_pages[0]; i++) { + uint64_t hugepage_sz = hpi->hugepage_sz; + + if (orig) { + hugepg_tbl[i].file_id = i; + hugepg_tbl[i].size = hugepage_sz; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, + sizeof(hugepg_tbl[i].filepath), hpi->hugedir, + hugepg_tbl[i].file_id); +#else + eal_get_hugefile_path(hugepg_tbl[i].filepath, + sizeof(hugepg_tbl[i].filepath), hpi->hugedir, + hugepg_tbl[i].file_id); +#endif + hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; + } +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G and 16G pages, just reuse + * original map address as final map address. + */ + else if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { + hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; + hugepg_tbl[i].orig_va = NULL; + continue; + } +#endif + +#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS + else if (vma_len == 0) { + unsigned j, num_pages; + + /* reserve a virtual area for next contiguous + * physical block: count the number of + * contiguous physical pages. */ + for (j = i+1; j < hpi->num_pages[0] ; j++) { +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in + * descending order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr + hugepage_sz) + break; +#endif + } + num_pages = j - i; + vma_len = num_pages * hugepage_sz; + + /* get the biggest virtual memory area up to + * vma_len. If it fails, vma_addr is NULL, so + * let the kernel provide the address. */ + vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + if (vma_addr == NULL) + vma_len = hugepage_sz; + } +#endif + + /* try to create hugepage file */ + fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, + strerror(errno)); + return -1; + } + + /* map the segment, and populate page tables, + * the kernel fills this segment with zeros */ + virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (virtaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, + strerror(errno)); + close(fd); + return -1; + } + + if (orig) { + hugepg_tbl[i].orig_va = virtaddr; + } + else { + hugepg_tbl[i].final_va = virtaddr; + } + + /* set shared flock on the file. */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + __func__, strerror(errno)); + close(fd); + return -1; + } + + close(fd); + + vma_addr = (char *)vma_addr + hugepage_sz; + vma_len -= hugepage_sz; + } + return 0; +} + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + +/* + * Remaps all hugepages into single file segments + */ +static int +remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + int fd; + unsigned i = 0, j, num_pages, page_idx = 0; + void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; + size_t vma_len = 0; + size_t hugepage_sz = hpi->hugepage_sz; + size_t total_size, offset; + char filepath[MAX_HUGEPAGE_PATH]; + phys_addr_t physaddr; + int socket; + + while (i < hpi->num_pages[0]) { + +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G pages and 16G pages, + * just reuse original map address as final map address. + */ + if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { + hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; + hugepg_tbl[i].orig_va = NULL; + i++; + continue; + } +#endif + + /* reserve a virtual area for next contiguous + * physical block: count the number of + * contiguous physical pages. */ + for (j = i+1; j < hpi->num_pages[0] ; j++) { +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in descending + * order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr + hugepage_sz) + break; +#endif + } + num_pages = j - i; + vma_len = num_pages * hugepage_sz; + + socket = hugepg_tbl[i].socket_id; + + /* get the biggest virtual memory area up to + * vma_len. If it fails, vma_addr is NULL, so + * let the kernel provide the address. */ + vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + + /* If we can't find a big enough virtual area, work out how many pages + * we are going to get */ + if (vma_addr == NULL) + j = i + 1; + else if (vma_len != num_pages * hugepage_sz) { + num_pages = vma_len / hugepage_sz; + j = i + num_pages; + + } + + hugepg_tbl[page_idx].file_id = page_idx; + eal_get_hugefile_path(filepath, + sizeof(filepath), + hpi->hugedir, + hugepg_tbl[page_idx].file_id); + + /* try to create hugepage file */ + fd = open(filepath, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); + return -1; + } + + total_size = 0; + for (;i < j; i++) { + + /* unmap current segment */ + if (total_size > 0) + munmap(vma_addr, total_size); + + /* unmap original page */ + munmap(hugepg_tbl[i].orig_va, hugepage_sz); + unlink(hugepg_tbl[i].filepath); + + total_size += hugepage_sz; + + old_addr = vma_addr; + + /* map new, bigger segment, and populate page tables, + * the kernel fills this segment with zeros */ + vma_addr = mmap(vma_addr, total_size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); + + if (vma_addr == MAP_FAILED || vma_addr != old_addr) { + RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); + close(fd); + return -1; + } + } + + /* set shared flock on the file. */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + __func__, strerror(errno)); + close(fd); + return -1; + } + + snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", + filepath); + + physaddr = rte_mem_virt2phy(vma_addr); + + if (physaddr == RTE_BAD_PHYS_ADDR) + return -1; + + hugepg_tbl[page_idx].final_va = vma_addr; + + hugepg_tbl[page_idx].physaddr = physaddr; + + hugepg_tbl[page_idx].repeated = num_pages; + + hugepg_tbl[page_idx].socket_id = socket; + + close(fd); + + /* verify the memory segment - that is, check that every VA corresponds + * to the physical address we expect to see + */ + for (offset = 0; offset < vma_len; offset += hugepage_sz) { + uint64_t expected_physaddr; + + expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; + page_addr = RTE_PTR_ADD(vma_addr, offset); + physaddr = rte_mem_virt2phy(page_addr); + + if (physaddr != expected_physaddr) { + RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " + "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 + " (expected 0x%" PRIx64 ")\n", + page_addr, offset, physaddr, expected_physaddr); + return -1; + } + } + + page_idx++; + } + + /* zero out the rest */ + memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); + return page_idx; +} +#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ + +/* Unmap all hugepages from original mapping */ +static int +unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + for (i = 0; i < hpi->num_pages[0]; i++) { + if (hugepg_tbl[i].orig_va) { + munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz); + hugepg_tbl[i].orig_va = NULL; + } + } + return 0; +} +#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ + +/* + * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge + * page. + */ +static int +find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + int socket_id; + char *end, *nodestr; + unsigned i, hp_count = 0; + uint64_t virt_addr; + char buf[BUFSIZ]; + char hugedir_str[PATH_MAX]; + FILE *f; + + f = fopen("/proc/self/numa_maps", "r"); + if (f == NULL) { + RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps," + " consider that all memory is in socket_id 0\n"); + return 0; + } + + snprintf(hugedir_str, sizeof(hugedir_str), + "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); + + /* parse numa map */ + while (fgets(buf, sizeof(buf), f) != NULL) { + + /* ignore non huge page */ + if (strstr(buf, " huge ") == NULL && + strstr(buf, hugedir_str) == NULL) + continue; + + /* get zone addr */ + virt_addr = strtoull(buf, &end, 16); + if (virt_addr == 0 || end == buf) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + + /* get node id (socket id) */ + nodestr = strstr(buf, " N"); + if (nodestr == NULL) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + nodestr += 2; + end = strstr(nodestr, "="); + if (end == NULL) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + end[0] = '\0'; + end = NULL; + + socket_id = strtoul(nodestr, &end, 0); + if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + + /* if we find this page in our mappings, set socket_id */ + for (i = 0; i < hpi->num_pages[0]; i++) { + void *va = (void *)(unsigned long)virt_addr; + if (hugepg_tbl[i].orig_va == va) { + hugepg_tbl[i].socket_id = socket_id; + hp_count++; + } + } + } + + if (hp_count < hpi->num_pages[0]) + goto error; + + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +static int +cmp_physaddr(const void *a, const void *b) +{ +#ifndef RTE_ARCH_PPC_64 + const struct hugepage_file *p1 = (const struct hugepage_file *)a; + const struct hugepage_file *p2 = (const struct hugepage_file *)b; +#else + /* PowerPC needs memory sorted in reverse order from x86 */ + const struct hugepage_file *p1 = (const struct hugepage_file *)b; + const struct hugepage_file *p2 = (const struct hugepage_file *)a; +#endif + if (p1->physaddr < p2->physaddr) + return -1; + else if (p1->physaddr > p2->physaddr) + return 1; + else + return 0; +} + +/* + * Uses mmap to create a shared memory area for storage of data + * Used in this file to store the hugepage file map on disk + */ +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + void *retval; + int fd = open(filename, O_CREAT | O_RDWR, 0666); + if (fd < 0) + return NULL; + if (ftruncate(fd, mem_size) < 0) { + close(fd); + return NULL; + } + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + return retval; +} + +/* + * this copies *active* hugepages from one hugepage table to another. + * destination is typically the shared memory. + */ +static int +copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size, + const struct hugepage_file * src, int src_size) +{ + int src_pos, dst_pos = 0; + + for (src_pos = 0; src_pos < src_size; src_pos++) { + if (src[src_pos].final_va != NULL) { + /* error on overflow attempt */ + if (dst_pos == dest_size) + return -1; + memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file)); + dst_pos++; + } + } + return 0; +} + +static int +unlink_hugepage_files(struct hugepage_file *hugepg_tbl, + unsigned num_hp_info) +{ + unsigned socket, size; + int page, nrpages = 0; + + /* get total number of hugepages */ + for (size = 0; size < num_hp_info; size++) + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) + nrpages += + internal_config.hugepage_info[size].num_pages[socket]; + + for (page = 0; page < nrpages; page++) { + struct hugepage_file *hp = &hugepg_tbl[page]; + + if (hp->final_va != NULL && unlink(hp->filepath)) { + RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n", + __func__, hp->filepath, strerror(errno)); + } + } + return 0; +} + +/* + * unmaps hugepages that are not going to be used. since we originally allocate + * ALL hugepages (not just those we need), additional unmapping needs to be done. + */ +static int +unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, + struct hugepage_info *hpi, + unsigned num_hp_info) +{ + unsigned socket, size; + int page, nrpages = 0; + + /* get total number of hugepages */ + for (size = 0; size < num_hp_info; size++) + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) + nrpages += internal_config.hugepage_info[size].num_pages[socket]; + + for (size = 0; size < num_hp_info; size++) { + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + unsigned pages_found = 0; + + /* traverse until we have unmapped all the unused pages */ + for (page = 0; page < nrpages; page++) { + struct hugepage_file *hp = &hugepg_tbl[page]; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* if this page was already cleared */ + if (hp->final_va == NULL) + continue; +#endif + + /* find a page that matches the criteria */ + if ((hp->size == hpi[size].hugepage_sz) && + (hp->socket_id == (int) socket)) { + + /* if we skipped enough pages, unmap the rest */ + if (pages_found == hpi[size].num_pages[socket]) { + uint64_t unmap_len; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + unmap_len = hp->size * hp->repeated; +#else + unmap_len = hp->size; +#endif + + /* get start addr and len of the remaining segment */ + munmap(hp->final_va, (size_t) unmap_len); + + hp->final_va = NULL; + if (unlink(hp->filepath) == -1) { + RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n", + __func__, hp->filepath, strerror(errno)); + return -1; + } + } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* else, check how much do we need to map */ + else { + int nr_pg_left = + hpi[size].num_pages[socket] - pages_found; + + /* if we need enough memory to fit into the segment */ + if (hp->repeated <= nr_pg_left) { + pages_found += hp->repeated; + } + /* truncate the segment */ + else { + uint64_t final_size = nr_pg_left * hp->size; + uint64_t seg_size = hp->repeated * hp->size; + + void * unmap_va = RTE_PTR_ADD(hp->final_va, + final_size); + int fd; + + munmap(unmap_va, seg_size - final_size); + + fd = open(hp->filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + if (ftruncate(fd, final_size) < 0) { + RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + close(fd); + + pages_found += nr_pg_left; + hp->repeated = nr_pg_left; + } + } +#else + /* else, lock the page and skip */ + else + pages_found++; +#endif + + } /* match page */ + } /* foreach page */ + } /* foreach socket */ + } /* foreach pagesize */ + + return 0; +} + +static inline uint64_t +get_socket_mem_size(int socket) +{ + uint64_t size = 0; + unsigned i; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++){ + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) + size += hpi->hugepage_sz * hpi->num_pages[socket]; + } + + return size; +} + +/* + * This function is a NUMA-aware equivalent of calc_num_pages. + * It takes in the list of hugepage sizes and the + * number of pages thereof, and calculates the best number of + * pages of each size to fulfill the request for ram + */ +static int +calc_num_pages_per_socket(uint64_t * memory, + struct hugepage_info *hp_info, + struct hugepage_info *hp_used, + unsigned num_hp_info) +{ + unsigned socket, j, i = 0; + unsigned requested, available; + int total_num_pages = 0; + uint64_t remaining_mem, cur_mem; + uint64_t total_mem = internal_config.memory; + + if (num_hp_info == 0) + return -1; + + /* if specific memory amounts per socket weren't requested */ + if (internal_config.force_sockets == 0) { + int cpu_per_socket[RTE_MAX_NUMA_NODES]; + size_t default_size, total_size; + unsigned lcore_id; + + /* Compute number of cores per socket */ + memset(cpu_per_socket, 0, sizeof(cpu_per_socket)); + RTE_LCORE_FOREACH(lcore_id) { + cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++; + } + + /* + * Automatically spread requested memory amongst detected sockets according + * to number of cores from cpu mask present on each socket + */ + total_size = internal_config.memory; + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { + + /* Set memory amount per socket */ + default_size = (internal_config.memory * cpu_per_socket[socket]) + / rte_lcore_count(); + + /* Limit to maximum available memory on socket */ + default_size = RTE_MIN(default_size, get_socket_mem_size(socket)); + + /* Update sizes */ + memory[socket] = default_size; + total_size -= default_size; + } + + /* + * If some memory is remaining, try to allocate it by getting all + * available memory from sockets, one after the other + */ + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { + /* take whatever is available */ + default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket], + total_size); + + /* Update sizes */ + memory[socket] += default_size; + total_size -= default_size; + } + } + + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { + /* skips if the memory on specific socket wasn't requested */ + for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ + hp_used[i].hugedir = hp_info[i].hugedir; + hp_used[i].num_pages[socket] = RTE_MIN( + memory[socket] / hp_info[i].hugepage_sz, + hp_info[i].num_pages[socket]); + + cur_mem = hp_used[i].num_pages[socket] * + hp_used[i].hugepage_sz; + + memory[socket] -= cur_mem; + total_mem -= cur_mem; + + total_num_pages += hp_used[i].num_pages[socket]; + + /* check if we have met all memory requests */ + if (memory[socket] == 0) + break; + + /* check if we have any more pages left at this size, if so + * move on to next size */ + if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket]) + continue; + /* At this point we know that there are more pages available that are + * bigger than the memory we want, so lets see if we can get enough + * from other page sizes. + */ + remaining_mem = 0; + for (j = i+1; j < num_hp_info; j++) + remaining_mem += hp_info[j].hugepage_sz * + hp_info[j].num_pages[socket]; + + /* is there enough other memory, if not allocate another page and quit */ + if (remaining_mem < memory[socket]){ + cur_mem = RTE_MIN(memory[socket], + hp_info[i].hugepage_sz); + memory[socket] -= cur_mem; + total_mem -= cur_mem; + hp_used[i].num_pages[socket]++; + total_num_pages++; + break; /* we are done with this socket*/ + } + } + /* if we didn't satisfy all memory requirements per socket */ + if (memory[socket] > 0) { + /* to prevent icc errors */ + requested = (unsigned) (internal_config.socket_mem[socket] / + 0x100000); + available = requested - + ((unsigned) (memory[socket] / 0x100000)); + RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! " + "Requested: %uMB, available: %uMB\n", socket, + requested, available); + return -1; + } + } + + /* if we didn't satisfy total memory requirements */ + if (total_mem > 0) { + requested = (unsigned) (internal_config.memory / 0x100000); + available = requested - (unsigned) (total_mem / 0x100000); + RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB," + " available: %uMB\n", requested, available); + return -1; + } + return total_num_pages; +} + +/* + * Prepare physical memory mapping: fill configuration structure with + * these infos, return 0 on success. + * 1. map N huge pages in separate files in hugetlbfs + * 2. find associated physical addr + * 3. find associated NUMA socket ID + * 4. sort all huge pages by physical address + * 5. remap these N huge pages in the correct order + * 6. unmap the first mapping + * 7. fill memsegs in configuration with contiguous zones + */ +int +rte_eal_hugepage_init(void) +{ + struct rte_mem_config *mcfg; + struct hugepage_file *hugepage, *tmp_hp = NULL; + struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; + + uint64_t memory[RTE_MAX_NUMA_NODES]; + + unsigned hp_offset; + int i, j, new_memseg; + int nr_hugefiles, nr_hugepages = 0; + void *addr; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int new_pages_count[MAX_HUGEPAGE_SIZES]; +#endif + + test_proc_pagemap_readable(); + + memset(used_hp, 0, sizeof(used_hp)); + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* hugetlbfs can be disabled */ + if (internal_config.no_hugetlbfs) { + addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, + strerror(errno)); + return -1; + } + mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].addr = addr; + mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; + mcfg->memseg[0].len = internal_config.memory; + mcfg->memseg[0].socket_id = 0; + return 0; + } + +/* check if app runs on Xen Dom0 */ + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + /* use dom0_mm kernel driver to init memory */ + if (rte_xen_dom0_memory_init() < 0) + return -1; + else + return 0; +#endif + } + + /* calculate total number of hugepages available. at this point we haven't + * yet started sorting them so they all are on socket 0 */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ + used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; + + nr_hugepages += internal_config.hugepage_info[i].num_pages[0]; + } + + /* + * allocate a memory area for hugepage table. + * this isn't shared memory yet. due to the fact that we need some + * processing done on these pages, shared memory will be created + * at a later stage. + */ + tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file)); + if (tmp_hp == NULL) + goto fail; + + memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file)); + + hp_offset = 0; /* where we start the current page size entries */ + + /* map all hugepages and sort them */ + for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ + struct hugepage_info *hpi; + + /* + * we don't yet mark hugepages as used at this stage, so + * we just map all hugepages available to the system + * all hugepages are still located on socket 0 + */ + hpi = &internal_config.hugepage_info[i]; + + if (hpi->num_pages[0] == 0) + continue; + + /* map all hugepages available */ + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* find physical addresses and sockets for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + qsort(&tmp_hp[hp_offset], hpi->num_pages[0], + sizeof(struct hugepage_file), cmp_physaddr); + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* remap all hugepages into single file segments */ + new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); + if (new_pages_count[i] < 0){ + RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += new_pages_count[i]; +#else + /* remap all hugepages */ + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* unmap original mappings */ + if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0) + goto fail; + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += hpi->num_pages[0]; +#endif + } + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + nr_hugefiles = 0; + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + nr_hugefiles += new_pages_count[i]; + } +#else + nr_hugefiles = nr_hugepages; +#endif + + + /* clean out the numbers of pages */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) + internal_config.hugepage_info[i].num_pages[j] = 0; + + /* get hugepages for each socket */ + for (i = 0; i < nr_hugefiles; i++) { + int socket = tmp_hp[i].socket_id; + + /* find a hugepage info with right size and increment num_pages */ + const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES, + (int)internal_config.num_hugepage_sizes); + for (j = 0; j < nb_hpsizes; j++) { + if (tmp_hp[i].size == + internal_config.hugepage_info[j].hugepage_sz) { +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + internal_config.hugepage_info[j].num_pages[socket] += + tmp_hp[i].repeated; +#else + internal_config.hugepage_info[j].num_pages[socket]++; +#endif + } + } + } + + /* make a copy of socket_mem, needed for number of pages calculation */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + memory[i] = internal_config.socket_mem[i]; + + /* calculate final number of pages */ + nr_hugepages = calc_num_pages_per_socket(memory, + internal_config.hugepage_info, used_hp, + internal_config.num_hugepage_sizes); + + /* error if not enough memory available */ + if (nr_hugepages < 0) + goto fail; + + /* reporting in! */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + if (used_hp[i].num_pages[j] > 0) { + RTE_LOG(DEBUG, EAL, + "Requesting %u pages of size %uMB" + " from socket %i\n", + used_hp[i].num_pages[j], + (unsigned) + (used_hp[i].hugepage_sz / 0x100000), + j); + } + } + } + + /* create shared memory */ + hugepage = create_shared_memory(eal_hugepage_info_path(), + nr_hugefiles * sizeof(struct hugepage_file)); + + if (hugepage == NULL) { + RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); + goto fail; + } + memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file)); + + /* + * unmap pages that we won't need (looks at used_hp). + * also, sets final_va to NULL on pages that were unmapped. + */ + if (unmap_unneeded_hugepages(tmp_hp, used_hp, + internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n"); + goto fail; + } + + /* + * copy stuff from malloc'd hugepage* to the actual shared memory. + * this procedure only copies those hugepages that have final_va + * not NULL. has overflow protection. + */ + if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles, + tmp_hp, nr_hugefiles) < 0) { + RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n"); + goto fail; + } + + /* free the hugepage backing files */ + if (internal_config.hugepage_unlink && + unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); + goto fail; + } + + /* free the temporary hugepage table */ + free(tmp_hp); + tmp_hp = NULL; + + /* find earliest free memseg - this is needed because in case of IVSHMEM, + * segments might have already been initialized */ + for (j = 0; j < RTE_MAX_MEMSEG; j++) + if (mcfg->memseg[j].addr == NULL) { + /* move to previous segment and exit loop */ + j--; + break; + } + + for (i = 0; i < nr_hugefiles; i++) { + new_memseg = 0; + + /* if this is a new section, create a new memseg */ + if (i == 0) + new_memseg = 1; + else if (hugepage[i].socket_id != hugepage[i-1].socket_id) + new_memseg = 1; + else if (hugepage[i].size != hugepage[i-1].size) + new_memseg = 1; + +#ifdef RTE_ARCH_PPC_64 + /* On PPC64 architecture, the mmap always start from higher + * virtual address to lower address. Here, both the physical + * address and virtual address are in descending order */ + else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) != + hugepage[i].size) + new_memseg = 1; + else if (((unsigned long)hugepage[i-1].final_va - + (unsigned long)hugepage[i].final_va) != hugepage[i].size) + new_memseg = 1; +#else + else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) != + hugepage[i].size) + new_memseg = 1; + else if (((unsigned long)hugepage[i].final_va - + (unsigned long)hugepage[i-1].final_va) != hugepage[i].size) + new_memseg = 1; +#endif + + if (new_memseg) { + j += 1; + if (j == RTE_MAX_MEMSEG) + break; + + mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].addr = hugepage[i].final_va; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; +#else + mcfg->memseg[j].len = hugepage[i].size; +#endif + mcfg->memseg[j].socket_id = hugepage[i].socket_id; + mcfg->memseg[j].hugepage_sz = hugepage[i].size; + } + /* continuation of previous memseg */ + else { +#ifdef RTE_ARCH_PPC_64 + /* Use the phy and virt address of the last page as segment + * address for IBM Power architecture */ + mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].addr = hugepage[i].final_va; +#endif + mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; + } + hugepage[i].memseg_id = j; + } + + if (i < nr_hugefiles) { + RTE_LOG(ERR, EAL, "Can only reserve %d pages " + "from %d requested\n" + "Current %s=%d is not enough\n" + "Please either increase it or request less amount " + "of memory.\n", + i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG), + RTE_MAX_MEMSEG); + return -ENOMEM; + } + + return 0; + +fail: + free(tmp_hp); + return -1; +} + +/* + * uses fstat to report the size of a file on disk + */ +static off_t +getFileSize(int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) + return 0; + return st.st_size; +} + +/* + * This creates the memory mappings in the secondary process to match that of + * the server process. It goes through each memory segment in the DPDK runtime + * configuration and finds the hugepages which form that segment, mapping them + * in order to form a contiguous block in the virtual memory space + */ +int +rte_eal_hugepage_attach(void) +{ + const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + const struct hugepage_file *hp = NULL; + unsigned num_hp = 0; + unsigned i, s = 0; /* s used to track the segment number */ + off_t size; + int fd, fd_zero = -1, fd_hugepage = -1; + + if (aslr_enabled() > 0) { + RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " + "(ASLR) is enabled in the kernel.\n"); + RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory " + "into secondary processes\n"); + } + + test_proc_pagemap_readable(); + + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + if (rte_xen_dom0_memory_attach() < 0) { + RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay " + "process\n"); + return -1; + } + return 0; +#endif + } + + fd_zero = open("/dev/zero", O_RDONLY); + if (fd_zero < 0) { + RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); + goto error; + } + fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); + goto error; + } + + /* map all segments into memory to make sure we get the addrs */ + for (s = 0; s < RTE_MAX_MEMSEG; ++s) { + void *base_addr; + + /* + * the first memory segment with len==0 is the one that + * follows the last valid segment. + */ + if (mcfg->memseg[s].len == 0) + break; + +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) + continue; +#endif + + /* + * fdzero is mmapped to get a contiguous block of virtual + * addresses of the appropriate memseg size. + * use mmap to get identical addresses as the primary process. + */ + base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, + PROT_READ, MAP_PRIVATE, fd_zero, 0); + if (base_addr == MAP_FAILED || + base_addr != mcfg->memseg[s].addr) { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero to requested address [%p]: '%s'\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, strerror(errno)); + if (aslr_enabled() > 0) { + RTE_LOG(ERR, EAL, "It is recommended to " + "disable ASLR in the kernel " + "and retry running both primary " + "and secondary processes\n"); + } + goto error; + } + } + + size = getFileSize(fd_hugepage); + hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); + if (hp == NULL) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); + goto error; + } + + num_hp = size / sizeof(struct hugepage_file); + RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp); + + s = 0; + while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ + void *addr, *base_addr; + uintptr_t offset = 0; + size_t mapping_size; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) { + s++; + continue; + } +#endif + /* + * free previously mapped memory so we can map the + * hugepages into the space + */ + base_addr = mcfg->memseg[s].addr; + munmap(base_addr, mcfg->memseg[s].len); + + /* find the hugepages for this segment and map them + * we don't need to worry about order, as the server sorted the + * entries before it did the second mmap of them */ + for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){ + if (hp[i].memseg_id == (int)s){ + fd = open(hp[i].filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", + hp[i].filepath); + goto error; + } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mapping_size = hp[i].size * hp[i].repeated; +#else + mapping_size = hp[i].size; +#endif + addr = mmap(RTE_PTR_ADD(base_addr, offset), + mapping_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); /* close file both on success and on failure */ + if (addr == MAP_FAILED || + addr != RTE_PTR_ADD(base_addr, offset)) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", + hp[i].filepath); + goto error; + } + offset+=mapping_size; + } + } + RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, + (unsigned long long)mcfg->memseg[s].len); + s++; + } + /* unmap the hugepage config file, since we are done using it */ + munmap((void *)(uintptr_t)hp, size); + close(fd_zero); + close(fd_hugepage); + return 0; + +error: + if (fd_zero >= 0) + close(fd_zero); + if (fd_hugepage >= 0) + close(fd_hugepage); + return -1; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c new file mode 100644 index 00000000..dbf12a84 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -0,0 +1,762 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_private.h" +#include "eal_pci_init.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in sysfs. + * When a registered device matches a driver, it is then initialized with + * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). + */ + +/* unbind kernel driver for this device */ +int +pci_unbind_kernel_driver(struct rte_pci_device *dev) +{ + int n; + FILE *f; + char filename[PATH_MAX]; + char buf[BUFSIZ]; + struct rte_pci_addr *loc = &dev->addr; + + /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ + snprintf(filename, sizeof(filename), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind", + loc->domain, loc->bus, loc->devid, loc->function); + + f = fopen(filename, "w"); + if (f == NULL) /* device was not bound */ + return 0; + + n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", + loc->domain, loc->bus, loc->devid, loc->function); + if ((n < 0) || (n >= (int)sizeof(buf))) { + RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); + goto error; + } + if (fwrite(buf, n, 1, f) == 0) { + RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, + filename); + goto error; + } + + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +static int +pci_get_kernel_driver_by_path(const char *filename, char *dri_name) +{ + int count; + char path[PATH_MAX]; + char *name; + + if (!filename || !dri_name) + return -1; + + count = readlink(filename, path, PATH_MAX); + if (count >= PATH_MAX) + return -1; + + /* For device does not have a driver */ + if (count < 0) + return 1; + + path[count] = '\0'; + + name = strrchr(path, '/'); + if (name) { + strncpy(dri_name, name + 1, strlen(name + 1) + 1); + return 0; + } + + return -1; +} + +/* Map pci device */ +int +rte_eal_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + ret = pci_vfio_map_resource(dev); +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_eal_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: + RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void * +pci_find_max_end_va(void) +{ + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); + const struct rte_memseg *last = seg; + unsigned i = 0; + + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { + if (seg->addr == NULL) + break; + + if (seg->addr > last->addr) + last = seg; + + } + return RTE_PTR_ADD(last->addr, last->len); +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; + union pci_resource_info { + struct { + char *phys_addr; + char *end_addr; + char *flags; + }; + char *ptrs[PCI_RESOURCE_FMT_NVAL]; + } res_info; + int i; + uint64_t phys_addr, end_addr, flags; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); + return -1; + } + + for (i = 0; imem_resource[i].phys_addr = phys_addr; + dev->mem_resource[i].len = end_addr - phys_addr + 1; + /* not mapped for now */ + dev->mem_resource[i].addr = NULL; + } + } + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +/* Scan one pci sysfs entry, and fill the devices list from it. */ +static int +pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, + uint8_t devid, uint8_t function) +{ + char filename[PATH_MAX]; + unsigned long tmp; + struct rte_pci_device *dev; + char driver[PATH_MAX]; + int ret; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) + return -1; + + memset(dev, 0, sizeof(*dev)); + dev->addr.domain = domain; + dev->addr.bus = bus; + dev->addr.devid = devid; + dev->addr.function = function; + + /* get vendor id */ + snprintf(filename, sizeof(filename), "%s/vendor", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.vendor_id = (uint16_t)tmp; + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.device_id = (uint16_t)tmp; + + /* get subsystem_vendor id */ + snprintf(filename, sizeof(filename), "%s/subsystem_vendor", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_vendor_id = (uint16_t)tmp; + + /* get subsystem_device id */ + snprintf(filename, sizeof(filename), "%s/subsystem_device", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_device_id = (uint16_t)tmp; + + /* get max_vfs */ + dev->max_vfs = 0; + snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + else { + /* for non igb_uio driver, need kernel version >= 3.8 */ + snprintf(filename, sizeof(filename), + "%s/sriov_numvfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + } + + /* get numa node */ + snprintf(filename, sizeof(filename), "%s/numa_node", + dirname); + if (access(filename, R_OK) != 0) { + /* if no NUMA support, set default to 0 */ + dev->numa_node = 0; + } else { + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->numa_node = tmp; + } + + /* parse resources */ + snprintf(filename, sizeof(filename), "%s/resource", dirname); + if (pci_parse_sysfs_resource(filename, dev) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); + free(dev); + return -1; + } + + /* parse driver */ + snprintf(filename, sizeof(filename), "%s/driver", dirname); + ret = pci_get_kernel_driver_by_path(filename, driver); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); + free(dev); + return -1; + } + + if (!ret) { + if (!strcmp(driver, "vfio-pci")) + dev->kdrv = RTE_KDRV_VFIO; + else if (!strcmp(driver, "igb_uio")) + dev->kdrv = RTE_KDRV_IGB_UIO; + else if (!strcmp(driver, "uio_pci_generic")) + dev->kdrv = RTE_KDRV_UIO_GENERIC; + else + dev->kdrv = RTE_KDRV_UNKNOWN; + } else + dev->kdrv = RTE_KDRV_NONE; + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&pci_device_list)) { + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } else { + struct rte_pci_device *dev2; + int ret; + + TAILQ_FOREACH(dev2, &pci_device_list, next) { + ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + + if (ret < 0) { + TAILQ_INSERT_BEFORE(dev2, dev, next); + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + memmove(dev2->mem_resource, dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + } + return 0; + } + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } + + return 0; +} + +/* + * split up a pci address into its constituent parts. + */ +static int +parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, + uint8_t *bus, uint8_t *devid, uint8_t *function) +{ + /* first split on ':' */ + union splitaddr { + struct { + char *domain; + char *bus; + char *devid; + char *function; + }; + char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ + } splitaddr; + + char *buf_copy = strndup(buf, bufsize); + if (buf_copy == NULL) + return -1; + + if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') + != PCI_FMT_NVAL - 1) + goto error; + /* final split is on '.' between devid and function */ + splitaddr.function = strchr(splitaddr.devid,'.'); + if (splitaddr.function == NULL) + goto error; + *splitaddr.function++ = '\0'; + + /* now convert to int values */ + errno = 0; + *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + if (errno != 0) + goto error; + + free(buf_copy); /* free the copy made with strdup */ + return 0; +error: + free(buf_copy); + return -1; +} + +/* + * Scan the content of the PCI bus, and the devices in the devices + * list + */ +int +rte_eal_pci_scan(void) +{ + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + uint16_t domain; + uint8_t bus, devid, function; + + dir = opendir(SYSFS_PCI_DEVICES); + if (dir == NULL) { + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", + __func__, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, + &bus, &devid, &function) != 0) + continue; + + snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES, + e->d_name); + if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +#ifdef RTE_PCI_CONFIG +/* + * It is deprecated, all its configurations have been moved into + * each PMD respectively. + */ +void +pci_config_space_set(__rte_unused struct rte_pci_device *dev) +{ + RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n"); +} +#endif + +/* Read PCI config space. */ +int rte_eal_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_read_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_read_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +/* Write PCI config space. */ +int rte_eal_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_write_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_write_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +#if defined(RTE_ARCH_X86) +static int +pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, + struct rte_pci_ioport *p) +{ + uint16_t start, end; + FILE *fp; + char *line = NULL; + char pci_id[16]; + int found = 0; + size_t linesz; + + snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT, + dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + + fp = fopen("/proc/ioports", "r"); + if (fp == NULL) { + RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__); + return -1; + } + + while (getdelim(&line, &linesz, '\n', fp) > 0) { + char *ptr = line; + char *left; + int n; + + n = strcspn(ptr, ":"); + ptr[n] = 0; + left = &ptr[n + 1]; + + while (*left && isspace(*left)) + left++; + + if (!strncmp(left, pci_id, strlen(pci_id))) { + found = 1; + + while (*ptr && isspace(*ptr)) + ptr++; + + sscanf(ptr, "%04hx-%04hx", &start, &end); + + break; + } + } + + free(line); + fclose(fp); + + if (!found) + return -1; + + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + p->base = start; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); + + return 0; +} +#endif + +int +rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_map(dev, bar, p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_map(dev, bar, p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#else + ret = pci_uio_ioport_map(dev, bar, p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#endif + break; + default: + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +void +rte_eal_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_read(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_read(p, data, len, offset); +#endif + break; + default: + break; + } +} + +void +rte_eal_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_write(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_write(p, data, len, offset); +#endif + break; + default: + break; + } +} + +int +rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_unmap(p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_unmap(p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = 0; +#else + ret = pci_uio_ioport_unmap(p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = 0; +#endif + break; + default: + break; + } + + return ret; +} + +/* Init the PCI EAL subsystem */ +int +rte_eal_pci_init(void) +{ + TAILQ_INIT(&pci_driver_list); + TAILQ_INIT(&pci_device_list); + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; + + if (rte_eal_pci_scan() < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); + return -1; + } +#ifdef VFIO_PRESENT + pci_vfio_enable(); + + if (pci_vfio_is_enabled()) { + + /* if we are primary process, create a thread to communicate with + * secondary processes. the thread will use a socket to wait for + * requests from secondary process to send open file descriptors, + * because VFIO does not allow multiple open descriptors on a group or + * VFIO container. + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + pci_vfio_mp_sync_setup() < 0) + return -1; + } +#endif + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h new file mode 100644 index 00000000..7011753d --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -0,0 +1,127 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_PCI_INIT_H_ +#define EAL_PCI_INIT_H_ + +#include "eal_vfio.h" + +/* + * Helper function to map PCI resources right after hugepages in virtual memory + */ +extern void *pci_map_addr; +void *pci_find_max_end_va(void); + +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +int pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_uio_ioport_unmap(struct rte_pci_ioport *p); + +#ifdef VFIO_PRESENT + +#define VFIO_MAX_GROUPS 64 + +int pci_vfio_enable(void); +int pci_vfio_is_enabled(void); +int pci_vfio_mp_sync_setup(void); + +/* access config space */ +int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); + +/* map VFIO resource prototype */ +int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_get_group_fd(int iommu_group_fd); +int pci_vfio_get_container_fd(void); + +/* + * Function prototypes for VFIO multiprocess sync functions + */ +int vfio_mp_sync_send_request(int socket, int req); +int vfio_mp_sync_receive_request(int socket); +int vfio_mp_sync_send_fd(int socket, int fd); +int vfio_mp_sync_receive_fd(int socket); +int vfio_mp_sync_connect_to_primary(void); + +/* socket comm protocol definitions */ +#define SOCKET_REQ_CONTAINER 0x100 +#define SOCKET_REQ_GROUP 0x200 +#define SOCKET_OK 0x0 +#define SOCKET_NO_FD 0x1 +#define SOCKET_ERR 0xFF + +/* + * we don't need to store device fd's anywhere since they can be obtained from + * the group fd via an ioctl() call. + */ +struct vfio_group { + int group_no; + int fd; +}; + +struct vfio_config { + int vfio_enabled; + int vfio_container_fd; + int vfio_container_has_dma; + int vfio_group_idx; + struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; +}; + +#endif + +#endif /* EAL_PCI_INIT_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c new file mode 100644 index 00000000..068694dc --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -0,0 +1,491 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(RTE_ARCH_X86) +#include +#endif + +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" + +void *pci_map_addr = NULL; + +#define OFF_MAX ((uint64_t)(off_t)-1) + +int +pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offset) +{ + return pread(intr_handle->uio_cfg_fd, buf, len, offset); +} + +int +pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offset) +{ + return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); +} + +static int +pci_uio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot read command from PCI config space!\n"); + return -1; + } + + /* return if bus mastering is already on */ + if (reg & PCI_COMMAND_MASTER) + return 0; + + reg |= PCI_COMMAND_MASTER; + + ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +static int +pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) +{ + FILE *f; + char filename[PATH_MAX]; + int ret; + unsigned major, minor; + dev_t dev; + + /* get the name of the sysfs file that contains the major and minor + * of the uio device and read its content */ + snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", + __func__); + return -1; + } + + ret = fscanf(f, "%u:%u", &major, &minor); + if (ret != 2) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", + __func__); + fclose(f); + return -1; + } + fclose(f); + + /* create the char device "mknod /dev/uioX c major minor" */ + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev = makedev(major, minor); + ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", + __func__, strerror(errno)); + return -1; + } + + return ret; +} + +/* + * Return the uioX char device used for a pci device. On success, return + * the UIO number and fill dstbuf string with the path of the device in + * sysfs. On error, return a negative value. In this case dstbuf is + * invalid. + */ +static int +pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, + unsigned int buflen, int create) +{ + struct rte_pci_addr *loc = &dev->addr; + unsigned int uio_num; + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + + /* depending on kernel version, uio can be located in uio/uioX + * or uio:uioX */ + + snprintf(dirname, sizeof(dirname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", + loc->domain, loc->bus, loc->devid, loc->function); + + dir = opendir(dirname); + if (dir == NULL) { + /* retry with the parent directory */ + snprintf(dirname, sizeof(dirname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + dir = opendir(dirname); + + if (dir == NULL) { + RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); + return -1; + } + } + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + /* format could be uio%d ...*/ + int shortprefix_len = sizeof("uio") - 1; + /* ... or uio:uio%d */ + int longprefix_len = sizeof("uio:uio") - 1; + char *endptr; + + if (strncmp(e->d_name, "uio", 3) != 0) + continue; + + /* first try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); + break; + } + + /* then try uio:uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + longprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + /* No uio resource found */ + if (e == NULL) + return -1; + + /* create uio device if we've been asked to */ + if (internal_config.create_uio_dev && create && + pci_mknod_uio_dev(dstbuf, uio_num) < 0) + RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); + + return uio_num; +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + if (dev->intr_handle.fd) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char dirname[PATH_MAX]; + char cfgname[PATH_MAX]; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + int uio_num; + struct rte_pci_addr *loc; + + loc = &dev->addr; + + /* find uio resource */ + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1); + if (uio_num < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + snprintf(cfgname, sizeof(cfgname), + "/sys/class/uio/uio%u/device/config", uio_num); + dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); + if (dev->intr_handle.uio_cfg_fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + cfgname, strerror(errno)); + goto error; + } + + if (dev->kdrv == RTE_KDRV_IGB_UIO) + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + else { + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; + + /* set bus master that is not done by uio_pci_generic */ + if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); + goto error; + } + } + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + void *mapaddr; + struct rte_pci_addr *loc; + struct pci_map *maps; + + loc = &dev->addr; + maps = uio_res->maps; + + /* update devname for mmap */ + snprintf(devname, sizeof(devname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", + loc->domain, loc->bus, loc->devid, + loc->function, res_idx); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + mapaddr = pci_map_resource(pci_map_addr, fd, 0, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + pci_map_addr = RTE_PTR_ADD(mapaddr, + (size_t)dev->mem_resource[res_idx].len); + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = 0; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ +#if defined(RTE_ARCH_X86) + char dirname[PATH_MAX]; + char filename[PATH_MAX]; + int uio_num; + unsigned long start; + + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); + if (uio_num < 0) + return -1; + + /* get portio start */ + snprintf(filename, sizeof(filename), + "%s/portio/port%d/start", dirname, bar); + if (eal_parse_sysfs_value(filename, &start) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", + __func__); + return -1; + } + /* ensure we don't get anything funny here, read/write will cast to + * uin16_t */ + if (start > UINT16_MAX) + return -1; + + /* FIXME only for primary process ? */ + if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { + + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev->intr_handle.fd = open(filename, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + filename, strerror(errno)); + return -1; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + } + + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); + + p->base = start; + return 0; +#else + RTE_SET_USED(dev); + RTE_SET_USED(bar); + RTE_SET_USED(p); + return -1; +#endif +} + +void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + uint8_t *d; + int size; + unsigned short reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + *(uint32_t *)d = inl(reg); + } else if (len >= 2) { + size = 2; + *(uint16_t *)d = inw(reg); + } else { + size = 1; + *d = inb(reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + const uint8_t *s; + int size; + unsigned short reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + outl_p(*(const uint32_t *)s, reg); + } else if (len >= 2) { + size = 2; + outw_p(*(const uint16_t *)s, reg); + } else { + size = 1; + outb_p(*s, reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +int +pci_uio_ioport_unmap(struct rte_pci_ioport *p) +{ + RTE_SET_USED(p); +#if defined(RTE_ARCH_X86) + /* FIXME close intr fd ? */ + return 0; +#else + return -1; +#endif +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c new file mode 100644 index 00000000..10266f8f --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -0,0 +1,1097 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" +#include "eal_vfio.h" + +/** + * @file + * PCI probing under linux (VFIO version) + * + * This code tries to determine if the PCI device is bound to VFIO driver, + * and initialize it (map BARs, set up interrupts) if that's the case. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +static struct rte_tailq_elem rte_vfio_tailq = { + .name = "VFIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_vfio_tailq) + +#define VFIO_DIR "/dev/vfio" +#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" +#define VFIO_GROUP_FMT "/dev/vfio/%u" +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" +#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) +#define VFIO_GET_REGION_IDX(x) (x >> 40) + +/* per-process VFIO config */ +static struct vfio_config vfio_cfg; + +/* DMA mapping function prototype. + * Takes VFIO container fd as a parameter. + * Returns 0 on success, -1 on error. + * */ +typedef int (*vfio_dma_func_t)(int); + +struct vfio_iommu_type { + int type_id; + const char *name; + vfio_dma_func_t dma_map_func; +}; + +static int vfio_type1_dma_map(int); +static int vfio_noiommu_dma_map(int); + +/* IOMMU types we support */ +static const struct vfio_iommu_type iommu_types[] = { + /* x86 IOMMU, otherwise known as type 1 */ + { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + /* IOMMU-less mode */ + { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, +}; + +int +vfio_type1_dma_map(int vfio_container_fd) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + int i, ret; + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + struct vfio_iommu_type1_dma_map dma_map; + + if (ms[i].addr == NULL) + break; + + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = ms[i].addr_64; + dma_map.size = ms[i].len; + dma_map.iova = ms[i].phys_addr; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + } + + return 0; +} + +int +vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) +{ + /* No-IOMMU mode does not need DMA mapping */ + return 0; +} + +int +pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs) +{ + return pread64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +int +pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs) +{ + return pwrite64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +/* get PCI BAR number where MSI-X interrupts are */ +static int +pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, + uint32_t *msix_table_size) +{ + int ret; + uint32_t reg; + uint16_t flags; + uint8_t cap_id, cap_offset; + + /* read PCI capability pointer from config space */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_CAPABILITY_LIST); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_offset = reg & 0xFF; + + while (cap_offset) { + + /* read PCI capability ID */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_id = reg & 0xFF; + + /* if we haven't reached MSI-X, check next capability */ + if (cap_id != PCI_CAP_ID_MSIX) { + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need second byte */ + cap_offset = (reg & 0xFF00) >> 8; + + continue; + } + /* else, read table offset */ + else { + /* table offset resides in the next 4 bytes */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 4); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " + "space!\n"); + return -1; + } + + ret = pread64(fd, &flags, sizeof(flags), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 2); + if (ret != sizeof(flags)) { + RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " + "space!\n"); + return -1; + } + + *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; + *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; + *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); + + return 0; + } + } + return 0; +} + +/* set PCI bus mastering */ +static int +pci_vfio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); + return -1; + } + + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + + ret = pwrite64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */ +static const struct vfio_iommu_type * +pci_vfio_set_iommu_type(int vfio_container_fd) { + unsigned idx; + for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { + const struct vfio_iommu_type *t = &iommu_types[idx]; + + int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, + t->type_id); + if (!ret) { + RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n", + t->type_id, t->name); + return t; + } + /* not an error, there may be more supported IOMMU types */ + RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, " + "error %i (%s)\n", t->type_id, t->name, errno, + strerror(errno)); + } + /* if we didn't find a suitable IOMMU type, fail */ + return NULL; +} + +/* check if we have any supported extensions */ +static int +pci_vfio_has_supported_extensions(int vfio_container_fd) { + int ret; + unsigned idx, n_extensions = 0; + for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { + const struct vfio_iommu_type *t = &iommu_types[idx]; + + ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, + t->type_id); + if (ret < 0) { + RTE_LOG(ERR, EAL, " could not get IOMMU type, " + "error %i (%s)\n", errno, + strerror(errno)); + close(vfio_container_fd); + return -1; + } else if (ret == 1) { + /* we found a supported extension */ + n_extensions++; + } + RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n", + t->type_id, t->name, + ret ? "supported" : "not supported"); + } + + /* if we didn't find any supported IOMMU types, fail */ + if (!n_extensions) { + close(vfio_container_fd); + return -1; + } + + return 0; +} + +/* set up interrupt support (but not enable interrupts) */ +static int +pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int i, ret, intr_idx; + + /* default to invalid index */ + intr_idx = VFIO_PCI_NUM_IRQS; + + /* get interrupt type from internal config (MSI-X by default, can be + * overriden from the command line + */ + switch (internal_config.vfio_intr_mode) { + case RTE_INTR_MODE_MSIX: + intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; + break; + case RTE_INTR_MODE_MSI: + intr_idx = VFIO_PCI_MSI_IRQ_INDEX; + break; + case RTE_INTR_MODE_LEGACY: + intr_idx = VFIO_PCI_INTX_IRQ_INDEX; + break; + /* don't do anything if we want to automatically determine interrupt type */ + case RTE_INTR_MODE_NONE: + break; + default: + RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); + return -1; + } + + /* start from MSI-X interrupt type */ + for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { + struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; + + /* skip interrupt modes we don't want */ + if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE && + i != intr_idx) + continue; + + irq.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); + if (ret < 0) { + RTE_LOG(ERR, EAL, " cannot get IRQ info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* if this vector cannot be used with eventfd, fail if we explicitly + * specified interrupt type, otherwise continue */ + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; + + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } + + return 0; + } + + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; +} + +/* open container fd or get an existing one */ +int +pci_vfio_get_container_fd(void) +{ + int ret, vfio_container_fd; + + /* if we're in a primary process, try to open the container */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); + if (vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, " cannot open VFIO container, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* check VFIO API version */ + ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION); + if (ret != VFIO_API_VERSION) { + if (ret < 0) + RTE_LOG(ERR, EAL, " could not get VFIO API version, " + "error %i (%s)\n", errno, strerror(errno)); + else + RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n"); + close(vfio_container_fd); + return -1; + } + + ret = pci_vfio_has_supported_extensions(vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " no supported IOMMU " + "extensions found!\n"); + return -1; + } + + return vfio_container_fd; + } else { + /* + * if we're in a secondary process, request container fd from the + * primary process via our socket + */ + int socket_fd; + + socket_fd = vfio_mp_sync_connect_to_primary(); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd); + if (vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, " cannot get container fd!\n"); + close(socket_fd); + return -1; + } + close(socket_fd); + return vfio_container_fd; + } + + return -1; +} + +/* open group fd or get an existing one */ +int +pci_vfio_get_group_fd(int iommu_group_no) +{ + int i; + int vfio_group_fd; + char filename[PATH_MAX]; + + /* check if we already have the group descriptor open */ + for (i = 0; i < vfio_cfg.vfio_group_idx; i++) + if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) + return vfio_cfg.vfio_groups[i].fd; + + /* if primary, try to open the group */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + /* try regular group format */ + snprintf(filename, sizeof(filename), + VFIO_GROUP_FMT, iommu_group_no); + vfio_group_fd = open(filename, O_RDWR); + if (vfio_group_fd < 0) { + /* if file not found, it's not an error */ + if (errno != ENOENT) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + return -1; + } + + /* special case: try no-IOMMU path as well */ + snprintf(filename, sizeof(filename), + VFIO_NOIOMMU_GROUP_FMT, iommu_group_no); + vfio_group_fd = open(filename, O_RDWR); + if (vfio_group_fd < 0) { + if (errno != ENOENT) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + return -1; + } + return 0; + } + /* noiommu group found */ + } + + /* if the fd is valid, create a new group for it */ + if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + close(vfio_group_fd); + return -1; + } + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + return vfio_group_fd; + } + /* if we're in a secondary process, request group fd from the primary + * process via our socket + */ + else { + int socket_fd, ret; + + socket_fd = vfio_mp_sync_connect_to_primary(); + + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { + RTE_LOG(ERR, EAL, " cannot send group number!\n"); + close(socket_fd); + return -1; + } + ret = vfio_mp_sync_receive_request(socket_fd); + switch (ret) { + case SOCKET_NO_FD: + close(socket_fd); + return 0; + case SOCKET_OK: + vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); + /* if we got the fd, return it */ + if (vfio_group_fd > 0) { + close(socket_fd); + return vfio_group_fd; + } + /* fall-through on error */ + default: + RTE_LOG(ERR, EAL, " cannot get container fd!\n"); + close(socket_fd); + return -1; + } + } + return -1; +} + +/* parse IOMMU group number for a PCI device + * returns 1 on success, -1 for errors, 0 for non-existent group + */ +static int +pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no) +{ + char linkname[PATH_MAX]; + char filename[PATH_MAX]; + char *tok[16], *group_tok, *end; + int ret; + + memset(linkname, 0, sizeof(linkname)); + memset(filename, 0, sizeof(filename)); + + /* try to find out IOMMU group for this device */ + snprintf(linkname, sizeof(linkname), + SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr); + + ret = readlink(linkname, filename, sizeof(filename)); + + /* if the link doesn't exist, no VFIO for us */ + if (ret < 0) + return 0; + + ret = rte_strsplit(filename, sizeof(filename), + tok, RTE_DIM(tok), '/'); + + if (ret <= 0) { + RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", pci_addr); + return -1; + } + + /* IOMMU group is always the last token */ + errno = 0; + group_tok = tok[ret - 1]; + end = group_tok; + *iommu_group_no = strtol(group_tok, &end, 10); + if ((end != group_tok && *end != '\0') || errno != 0) { + RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", pci_addr); + return -1; + } + + return 1; +} + +static void +clear_current_group(void) +{ + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1; +} + + +/* + * map the PCI resources of a PCI device in virtual memory (VFIO version). + * primary and secondary processes follow almost exactly the same path + */ +int +pci_vfio_map_resource(struct rte_pci_device *dev) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status) + }; + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + int vfio_group_fd, vfio_dev_fd; + int iommu_group_no; + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + int i, ret, msix_bar; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + + struct pci_map *maps; + uint32_t msix_table_offset = 0; + uint32_t msix_table_size = 0; + uint32_t ioport_bar; + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + /* get group number */ + ret = pci_vfio_get_group_no(pci_addr, &iommu_group_no); + if (ret == 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + return 1; + } + + /* if negative, something failed */ + if (ret < 0) + return -1; + + /* get the actual group fd */ + vfio_group_fd = pci_vfio_get_group_fd(iommu_group_no); + if (vfio_group_fd < 0) + return -1; + + /* store group fd */ + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + + /* if group_fd == 0, that means the device isn't managed by VFIO */ + if (vfio_group_fd == 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + /* we store 0 as group fd to distinguish between existing but + * unbound VFIO groups, and groups that don't exist at all. + */ + vfio_cfg.vfio_group_idx++; + return 1; + } + + /* + * at this point, we know at least one port on this device is bound to VFIO, + * so we can proceed to try and set this particular port up + */ + + /* check if the group is viable */ + ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get group status, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_current_group(); + return -1; + } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { + RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", pci_addr); + close(vfio_group_fd); + clear_current_group(); + return -1; + } + + /* + * at this point, we know that this group is viable (meaning, all devices + * are either bound to VFIO or not bound to anything) + */ + + /* check if group does not have a container yet */ + if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) { + + /* add group to a container */ + ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER, + &vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_current_group(); + return -1; + } + /* + * at this point we know that this group has been successfully + * initialized, so we increment vfio_group_idx to indicate that we can + * add new groups. + */ + vfio_cfg.vfio_group_idx++; + } + + /* + * pick an IOMMU type and set up DMA mappings for container + * + * needs to be done only once, only when at least one group is assigned to + * a container and only in primary process + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + vfio_cfg.vfio_container_has_dma == 0) { + /* select an IOMMU type which we will be using */ + const struct vfio_iommu_type *t = + pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd); + if (!t) { + RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", pci_addr); + return -1; + } + ret = t->dma_map_func(vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " %s DMA remapping failed, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + return -1; + } + vfio_cfg.vfio_container_has_dma = 1; + } + + /* get a file descriptor for the device */ + vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr); + if (vfio_dev_fd < 0) { + /* if we cannot get a device fd, this simply means that this + * particular port is not bound to VFIO + */ + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + return 1; + } + + /* test and setup the device */ + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_INFO, &device_info); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get device info, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_dev_fd); + return -1; + } + + /* get MSI-X BAR, if any (we have to know where it is because we can't + * easily mmap it when using VFIO) */ + msix_bar = -1; + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, + &msix_table_offset, &msix_table_size); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); + close(vfio_dev_fd); + return -1; + } + + /* if we're in a primary process, allocate vfio_res and get region info */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + close(vfio_dev_fd); + return -1; + } + memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); + + /* get number of registers (up to BAR5) */ + vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, + VFIO_PCI_BAR5_REGION_INDEX + 1); + } else { + /* if we're in a secondary process, just find our tailq entry */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + close(vfio_dev_fd); + return -1; + } + } + + /* map BARs */ + maps = vfio_res->maps; + + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + struct vfio_region_info reg = { .argsz = sizeof(reg) }; + void *bar_addr; + struct memreg { + unsigned long offset, size; + } memreg[2] = {}; + + reg.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); + + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get device region info " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_dev_fd); + if (internal_config.process_type == RTE_PROC_PRIMARY) + rte_free(vfio_res); + return -1; + } + + /* chk for io port region */ + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_BASE_ADDRESS_0 + i*4); + + if (ret != sizeof(ioport_bar)) { + RTE_LOG(ERR, EAL, + "Cannot read command (%x) from config space!\n", + PCI_BASE_ADDRESS_0 + i*4); + return -1; + } + + if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) { + RTE_LOG(INFO, EAL, + "Ignore mapping IO port bar(%d) addr: %x\n", + i, ioport_bar); + continue; + } + + /* skip non-mmapable BARs */ + if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) + continue; + + if (i == msix_bar) { + /* + * VFIO will not let us map the MSI-X table, + * but we can map around it. + */ + uint32_t table_start = msix_table_offset; + uint32_t table_end = table_start + msix_table_size; + table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; + table_start &= PAGE_MASK; + + if (table_start == 0 && table_end >= reg.size) { + /* Cannot map this BAR */ + RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); + continue; + } else { + memreg[0].offset = reg.offset; + memreg[0].size = table_start; + memreg[1].offset = table_end; + memreg[1].size = reg.size - table_end; + + RTE_LOG(DEBUG, EAL, + "Trying to map BAR %d that contains the MSI-X " + "table. Trying offsets: " + "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i, + memreg[0].offset, memreg[0].size, + memreg[1].offset, memreg[1].size); + } + } else { + memreg[0].offset = reg.offset; + memreg[0].size = reg.size; + } + + /* try to figure out an address */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + bar_addr = pci_map_addr; + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + } else { + bar_addr = maps[i].addr; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + if (memreg[0].size) { + /* actual map of first part */ + map_addr = pci_map_resource(bar_addr, vfio_dev_fd, + memreg[0].offset, + memreg[0].size, + MAP_FIXED); + } + + /* if there's a second part, try to map it */ + if (map_addr != MAP_FAILED + && memreg[1].offset && memreg[1].size) { + void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset); + map_addr = pci_map_resource(second_addr, + vfio_dev_fd, memreg[1].offset, + memreg[1].size, + MAP_FIXED); + } + + if (map_addr == MAP_FAILED || !map_addr) { + munmap(bar_addr, reg.size); + bar_addr = MAP_FAILED; + } + } + + if (bar_addr == MAP_FAILED || + (internal_config.process_type == RTE_PROC_SECONDARY && + bar_addr != maps[i].addr)) { + RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, + strerror(errno)); + close(vfio_dev_fd); + if (internal_config.process_type == RTE_PROC_PRIMARY) + rte_free(vfio_res); + return -1; + } + + maps[i].addr = bar_addr; + maps[i].offset = reg.offset; + maps[i].size = reg.size; + maps[i].path = NULL; /* vfio doesn't have per-resource paths */ + dev->mem_resource[i].addr = bar_addr; + } + + /* if secondary process, do not set up interrupts */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr); + close(vfio_dev_fd); + rte_free(vfio_res); + return -1; + } + + /* set bus mastering for the device */ + if (pci_vfio_set_bus_master(vfio_dev_fd)) { + RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); + close(vfio_dev_fd); + rte_free(vfio_res); + return -1; + } + + /* Reset the device */ + ioctl(vfio_dev_fd, VFIO_DEVICE_RESET); + } + + if (internal_config.process_type == RTE_PROC_PRIMARY) + TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); + + return 0; +} + +int +pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + if (bar < VFIO_PCI_BAR0_REGION_INDEX || + bar > VFIO_PCI_BAR5_REGION_INDEX) { + RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); + return -1; + } + + p->dev = dev; + p->base = VFIO_GET_REGION_ADDR(bar); + return 0; +} + +void +pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pread64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +void +pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pwrite64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +int +pci_vfio_ioport_unmap(struct rte_pci_ioport *p) +{ + RTE_SET_USED(p); + return -1; +} + +int +pci_vfio_enable(void) +{ + /* initialize group list */ + int i; + int vfio_available; + + for (i = 0; i < VFIO_MAX_GROUPS; i++) { + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].group_no = -1; + } + + /* inform the user that we are probing for VFIO */ + RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); + + /* check if vfio-pci module is loaded */ + vfio_available = rte_eal_check_module("vfio_pci"); + + /* return error directly */ + if (vfio_available == -1) { + RTE_LOG(INFO, EAL, "Could not get loaded module details!\n"); + return -1; + } + + /* return 0 if VFIO modules not loaded */ + if (vfio_available == 0) { + RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, " + "skipping VFIO support...\n"); + return 0; + } + + vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd(); + + /* check if we have VFIO driver enabled */ + if (vfio_cfg.vfio_container_fd != -1) { + RTE_LOG(NOTICE, EAL, "VFIO support initialized\n"); + vfio_cfg.vfio_enabled = 1; + } else { + RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n"); + } + + return 0; +} + +int +pci_vfio_is_enabled(void) +{ + return vfio_cfg.vfio_enabled; +} +#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c new file mode 100644 index 00000000..d9188fde --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c @@ -0,0 +1,405 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +/* sys/un.h with __USE_MISC uses strlen, which is unsafe */ +#ifdef __USE_MISC +#define REMOVED_USE_MISC +#undef __USE_MISC +#endif +#include +/* make sure we redefine __USE_MISC only if it was previously undefined */ +#ifdef REMOVED_USE_MISC +#define __USE_MISC +#undef REMOVED_USE_MISC +#endif + +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" +#include "eal_thread.h" + +/** + * @file + * VFIO socket for communication between primary and secondary processes. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#define SOCKET_PATH_FMT "%s/.%s_mp_socket" +#define CMSGLEN (CMSG_LEN(sizeof(int))) +#define FD_TO_CMSGHDR(fd, chdr) \ + do {\ + (chdr).cmsg_len = CMSGLEN;\ + (chdr).cmsg_level = SOL_SOCKET;\ + (chdr).cmsg_type = SCM_RIGHTS;\ + memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\ + } while (0) +#define CMSGHDR_TO_FD(chdr, fd) \ + memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd)) + +static pthread_t socket_thread; +static int mp_socket_fd; + + +/* get socket path (/var/run if root, $HOME otherwise) */ +static void +get_socket_path(char *buffer, int bufsz) +{ + const char *dir = "/var/run"; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + dir = home_dir; + + /* use current prefix as file path */ + snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir, + internal_config.hugefile_prefix); +} + + + +/* + * data flow for socket comm protocol: + * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP + * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number + * 2. server receives message + * 2a. in case of invalid group, SOCKET_ERR is sent back to client + * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client + * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd + * + * in case of any error, socket is closed. + */ + +/* send a request, return -1 on error */ +int +vfio_mp_sync_send_request(int socket, int req) +{ + struct msghdr hdr; + struct iovec iov; + int buf; + int ret; + + memset(&hdr, 0, sizeof(hdr)); + + buf = req; + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + + ret = sendmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + return 0; +} + +/* receive a request and return it */ +int +vfio_mp_sync_receive_request(int socket) +{ + int buf; + struct msghdr hdr; + struct iovec iov; + int ret, req; + + memset(&hdr, 0, sizeof(hdr)); + + buf = SOCKET_ERR; + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + + ret = recvmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + + req = buf; + + return req; +} + +/* send OK in message, fd in control message */ +int +vfio_mp_sync_send_fd(int socket, int fd) +{ + int buf; + struct msghdr hdr; + struct cmsghdr *chdr; + char chdr_buf[CMSGLEN]; + struct iovec iov; + int ret; + + chdr = (struct cmsghdr *) chdr_buf; + memset(chdr, 0, sizeof(chdr_buf)); + memset(&hdr, 0, sizeof(hdr)); + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + hdr.msg_control = chdr; + hdr.msg_controllen = CMSGLEN; + + buf = SOCKET_OK; + FD_TO_CMSGHDR(fd, *chdr); + + ret = sendmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + return 0; +} + +/* receive OK in message, fd in control message */ +int +vfio_mp_sync_receive_fd(int socket) +{ + int buf; + struct msghdr hdr; + struct cmsghdr *chdr; + char chdr_buf[CMSGLEN]; + struct iovec iov; + int ret, req, fd; + + buf = SOCKET_ERR; + + chdr = (struct cmsghdr *) chdr_buf; + memset(chdr, 0, sizeof(chdr_buf)); + memset(&hdr, 0, sizeof(hdr)); + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + hdr.msg_control = chdr; + hdr.msg_controllen = CMSGLEN; + + ret = recvmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + + req = buf; + + if (req != SOCKET_OK) + return -1; + + CMSGHDR_TO_FD(*chdr, fd); + + return fd; +} + +/* connect socket_fd in secondary process to the primary process's socket */ +int +vfio_mp_sync_connect_to_primary(void) +{ + struct sockaddr_un addr; + socklen_t sockaddr_len; + int socket_fd; + + /* set up a socket */ + socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + return -1; + } + + get_socket_path(addr.sun_path, sizeof(addr.sun_path)); + addr.sun_family = AF_UNIX; + + sockaddr_len = sizeof(struct sockaddr_un); + + if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) + return socket_fd; + + /* if connect failed */ + close(socket_fd); + return -1; +} + + + +/* + * socket listening thread for primary process + */ +static __attribute__((noreturn)) void * +pci_vfio_mp_sync_thread(void __rte_unused * arg) +{ + int ret, fd, vfio_group_no; + + /* wait for requests on the socket */ + for (;;) { + int conn_sock; + struct sockaddr_un addr; + socklen_t sockaddr_len = sizeof(addr); + + /* this is a blocking call */ + conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr, + &sockaddr_len); + + /* just restart on error */ + if (conn_sock == -1) + continue; + + /* set socket to linger after close */ + struct linger l; + l.l_onoff = 1; + l.l_linger = 60; + setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)); + + ret = vfio_mp_sync_receive_request(conn_sock); + + switch (ret) { + case SOCKET_REQ_CONTAINER: + fd = pci_vfio_get_container_fd(); + if (fd < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + else + vfio_mp_sync_send_fd(conn_sock, fd); + break; + case SOCKET_REQ_GROUP: + /* wait for group number */ + vfio_group_no = vfio_mp_sync_receive_request(conn_sock); + if (vfio_group_no < 0) { + close(conn_sock); + continue; + } + + fd = pci_vfio_get_group_fd(vfio_group_no); + + if (fd < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + /* if VFIO group exists but isn't bound to VFIO driver */ + else if (fd == 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + /* if group exists and is bound to VFIO driver */ + else { + vfio_mp_sync_send_request(conn_sock, SOCKET_OK); + vfio_mp_sync_send_fd(conn_sock, fd); + } + break; + default: + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + break; + } + close(conn_sock); + } +} + +static int +vfio_mp_sync_socket_setup(void) +{ + int ret, socket_fd; + struct sockaddr_un addr; + socklen_t sockaddr_len; + + /* set up a socket */ + socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + return -1; + } + + get_socket_path(addr.sun_path, sizeof(addr.sun_path)); + addr.sun_family = AF_UNIX; + + sockaddr_len = sizeof(struct sockaddr_un); + + unlink(addr.sun_path); + + ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len); + if (ret) { + RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno)); + close(socket_fd); + return -1; + } + + ret = listen(socket_fd, 50); + if (ret) { + RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno)); + close(socket_fd); + return -1; + } + + /* save the socket in local configuration */ + mp_socket_fd = socket_fd; + + return 0; +} + +/* + * set up a local socket and tell it to listen for incoming connections + */ +int +pci_vfio_mp_sync_setup(void) +{ + int ret; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (vfio_mp_sync_socket_setup() < 0) { + RTE_LOG(ERR, EAL, "Failed to set up local socket!\n"); + return -1; + } + + ret = pthread_create(&socket_thread, NULL, + pci_vfio_mp_sync_thread, NULL); + if (ret) { + RTE_LOG(ERR, EAL, + "Failed to create thread for communication with secondary processes!\n"); + close(mp_socket_fd); + return -1; + } + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync"); + ret = rte_thread_setname(socket_thread, thread_name); + if (ret) + RTE_LOG(ERR, EAL, + "Failed to set thread name for secondary processes!\n"); + + return 0; +} + +#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c new file mode 100644 index 00000000..18bd8e04 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -0,0 +1,199 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY; +RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY; +RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset); + +/* + * Send a message to a slave lcore identified by slave_id to call a + * function f with argument arg. Once the execution is done, the + * remote lcore switch in FINISHED state. + */ +int +rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id) +{ + int n; + char c = 0; + int m2s = lcore_config[slave_id].pipe_master2slave[1]; + int s2m = lcore_config[slave_id].pipe_slave2master[0]; + + if (lcore_config[slave_id].state != WAIT) + return -EBUSY; + + lcore_config[slave_id].f = f; + lcore_config[slave_id].arg = arg; + + /* send message */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(m2s, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + /* wait ack */ + do { + n = read(s2m, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + return 0; +} + +/* set affinity for current EAL thread */ +static int +eal_thread_set_affinity(void) +{ + unsigned lcore_id = rte_lcore_id(); + + /* acquire system unique id */ + rte_gettid(); + + /* update EAL thread core affinity */ + return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset); +} + +void eal_thread_init_master(unsigned lcore_id) +{ + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); +} + +/* main loop of threads */ +__attribute__((noreturn)) void * +eal_thread_loop(__attribute__((unused)) void *arg) +{ + char c; + int n, ret; + unsigned lcore_id; + pthread_t thread_id; + int m2s, s2m; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + + thread_id = pthread_self(); + + /* retrieve our lcore_id from the configuration structure */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (thread_id == lcore_config[lcore_id].thread_id) + break; + } + if (lcore_id == RTE_MAX_LCORE) + rte_panic("cannot retrieve lcore id\n"); + + m2s = lcore_config[lcore_id].pipe_master2slave[0]; + s2m = lcore_config[lcore_id].pipe_slave2master[1]; + + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n", + lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "..."); + + /* read on our pipe to get commands */ + while (1) { + void *fct_arg; + + /* wait command */ + do { + n = read(m2s, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + lcore_config[lcore_id].state = RUNNING; + + /* send ack */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(s2m, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + if (lcore_config[lcore_id].f == NULL) + rte_panic("NULL function pointer\n"); + + /* call the function and store the return value */ + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; + rte_wmb(); + lcore_config[lcore_id].state = FINISHED; + } + + /* never reached */ + /* pthread_exit(NULL); */ + /* return NULL; */ +} + +/* require calling thread tid by gettid() */ +int rte_sys_gettid(void) +{ + return (int)syscall(SYS_gettid); +} diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c new file mode 100644 index 00000000..f2abb7b6 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2012-2013 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +enum timer_source eal_timer_source = EAL_TIMER_HPET; + +#ifdef RTE_LIBEAL_USE_HPET + +#define DEV_HPET "/dev/hpet" + +/* Maximum number of counters. */ +#define HPET_TIMER_NUM 3 + +/* General capabilities register */ +#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */ +#define CLK_PERIOD_MASK 0xffffffff00000000ULL /* Clock period mask. */ + +/** + * HPET timer registers. From the Intel IA-PC HPET (High Precision Event + * Timers) Specification. + */ +struct eal_hpet_regs { + /* Memory-mapped, software visible registers */ + uint64_t capabilities; /**< RO General Capabilities Register. */ + uint64_t reserved0; /**< Reserved for future use. */ + uint64_t config; /**< RW General Configuration Register. */ + uint64_t reserved1; /**< Reserved for future use. */ + uint64_t isr; /**< RW Clear General Interrupt Status. */ + uint64_t reserved2[25]; /**< Reserved for future use. */ + union { + uint64_t counter; /**< RW Main Counter Value Register. */ + struct { + uint32_t counter_l; /**< RW Main Counter Low. */ + uint32_t counter_h; /**< RW Main Counter High. */ + }; + }; + uint64_t reserved3; /**< Reserved for future use. */ + struct { + uint64_t config; /**< RW Timer Config and Capability Reg. */ + uint64_t comp; /**< RW Timer Comparator Value Register. */ + uint64_t fsb; /**< RW FSB Interrupt Route Register. */ + uint64_t reserved4; /**< Reserved for future use. */ + } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */ +}; + +/* Mmap'd hpet registers */ +static volatile struct eal_hpet_regs *eal_hpet = NULL; + +/* Period at which the HPET counter increments in + * femtoseconds (10^-15 seconds). */ +static uint32_t eal_hpet_resolution_fs = 0; + +/* Frequency of the HPET counter in Hz */ +static uint64_t eal_hpet_resolution_hz = 0; + +/* Incremented 4 times during one 32bits hpet full count */ +static uint32_t eal_hpet_msb; + +static pthread_t msb_inc_thread_id; + +/* + * This function runs on a specific thread to update a global variable + * containing used to process MSB of the HPET (unfortunatelly, we need + * this because hpet is 32 bits by default under linux). + */ +static void +hpet_msb_inc(__attribute__((unused)) void *arg) +{ + uint32_t t; + + while (1) { + t = (eal_hpet->counter_l >> 30); + if (t != (eal_hpet_msb & 3)) + eal_hpet_msb ++; + sleep(10); + } +} + +uint64_t +rte_get_hpet_hz(void) +{ + if(internal_config.no_hpet) + rte_panic("Error, HPET called, but no HPET present\n"); + + return eal_hpet_resolution_hz; +} + +uint64_t +rte_get_hpet_cycles(void) +{ + uint32_t t, msb; + uint64_t ret; + + if(internal_config.no_hpet) + rte_panic("Error, HPET called, but no HPET present\n"); + + t = eal_hpet->counter_l; + msb = eal_hpet_msb; + ret = (msb + 2 - (t >> 30)) / 4; + ret <<= 32; + ret += t; + return ret; +} + +#endif + +#ifdef RTE_LIBEAL_USE_HPET +/* + * Open and mmap /dev/hpet (high precision event timer) that will + * provide our time reference. + */ +int +rte_eal_hpet_init(int make_default) +{ + int fd, ret; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (internal_config.no_hpet) { + RTE_LOG(NOTICE, EAL, "HPET is disabled\n"); + return -1; + } + + fd = open(DEV_HPET, O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "ERROR: Cannot open "DEV_HPET": %s!\n", + strerror(errno)); + internal_config.no_hpet = 1; + return -1; + } + eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0); + if (eal_hpet == MAP_FAILED) { + RTE_LOG(ERR, EAL, "ERROR: Cannot mmap "DEV_HPET"!\n" + "Please enable CONFIG_HPET_MMAP in your kernel configuration " + "to allow HPET support.\n" + "To run without using HPET, set CONFIG_RTE_LIBEAL_USE_HPET=n " + "in your build configuration or use '--no-hpet' EAL flag.\n"); + close(fd); + internal_config.no_hpet = 1; + return -1; + } + close(fd); + + eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities & + CLK_PERIOD_MASK) >> + CLK_PERIOD_SHIFT); + + eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) / + (uint64_t)eal_hpet_resolution_fs; + + RTE_LOG(INFO, EAL, "HPET frequency is ~%"PRIu64" kHz\n", + eal_hpet_resolution_hz/1000); + + eal_hpet_msb = (eal_hpet->counter_l >> 30); + + /* create a thread that will increment a global variable for + * msb (hpet is 32 bits by default under linux) */ + ret = pthread_create(&msb_inc_thread_id, NULL, + (void *(*)(void *))hpet_msb_inc, NULL); + if (ret != 0) { + RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n"); + internal_config.no_hpet = 1; + return -1; + } + + /* + * Set thread_name for aid in debugging. + */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc"); + ret = rte_thread_setname(msb_inc_thread_id, thread_name); + if (ret != 0) + RTE_LOG(ERR, EAL, + "ERROR: Cannot set HPET timer thread name!\n"); + + if (make_default) + eal_timer_source = EAL_TIMER_HPET; + return 0; +} +#endif + +static void +check_tsc_flags(void) +{ + char line[512]; + FILE *stream; + + stream = fopen("/proc/cpuinfo", "r"); + if (!stream) { + RTE_LOG(WARNING, EAL, "WARNING: Unable to open /proc/cpuinfo\n"); + return; + } + + while (fgets(line, sizeof line, stream)) { + char *constant_tsc; + char *nonstop_tsc; + + if (strncmp(line, "flags", 5) != 0) + continue; + + constant_tsc = strstr(line, "constant_tsc"); + nonstop_tsc = strstr(line, "nonstop_tsc"); + if (!constant_tsc || !nonstop_tsc) + RTE_LOG(WARNING, EAL, + "WARNING: cpu flags " + "constant_tsc=%s " + "nonstop_tsc=%s " + "-> using unreliable clock cycles !\n", + constant_tsc ? "yes":"no", + nonstop_tsc ? "yes":"no"); + break; + } + + fclose(stream); +} + +uint64_t +get_tsc_freq(void) +{ +#ifdef CLOCK_MONOTONIC_RAW +#define NS_PER_SEC 1E9 + + struct timespec sleeptime = {.tv_nsec = NS_PER_SEC / 10 }; /* 1/10 second */ + + struct timespec t_start, t_end; + uint64_t tsc_hz; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) { + uint64_t ns, end, start = rte_rdtsc(); + nanosleep(&sleeptime,NULL); + clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); + end = rte_rdtsc(); + ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC); + ns += (t_end.tv_nsec - t_start.tv_nsec); + + double secs = (double)ns/NS_PER_SEC; + tsc_hz = (uint64_t)((end - start)/secs); + return tsc_hz; + } +#endif + return 0; +} + +int +rte_eal_timer_init(void) +{ + + eal_timer_source = EAL_TIMER_TSC; + + set_tsc_freq(); + check_tsc_flags(); + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h new file mode 100644 index 00000000..f483bf40 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_VFIO_H_ +#define EAL_VFIO_H_ + +/* + * determine if VFIO is present on the system + */ +#ifdef RTE_EAL_VFIO +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) +#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#else +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE +#endif + +#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) +#define RTE_VFIO_NOIOMMU 8 +#else +#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU +#endif + +#define VFIO_PRESENT +#endif /* kernel version */ +#endif /* RTE_EAL_VFIO */ + +#endif /* EAL_VFIO_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c new file mode 100644 index 00000000..495eef9e --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c @@ -0,0 +1,369 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include + +#define PAGE_SIZE RTE_PGSIZE_4K +#define DEFAUL_DOM0_NAME "dom0-mem" + +static int xen_fd = -1; +static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB"; + +/* + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by mem_size until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of mem_size size. + */ +static void * +xen_get_virtual_area(size_t *size, size_t mem_size) +{ + void *addr; + int fd; + long aligned_addr; + + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size); + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0){ + RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); + return NULL; + } + do { + addr = mmap(NULL, (*size) + mem_size, PROT_READ, + MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) + *size -= mem_size; + } while (addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { + close(fd); + RTE_LOG(ERR, EAL, "Cannot get a virtual area\n"); + return NULL; + } + + munmap(addr, (*size) + mem_size); + close(fd); + + /* align addr to a mem_size boundary */ + aligned_addr = (uintptr_t)addr; + aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size); + addr = (void *)(aligned_addr); + + RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", + addr, *size); + + return addr; +} + +/** + * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm + * /memsize-mB/memsize file, and the size unit is mB. + */ +static int +get_xen_memory_size(void) +{ + char path[PATH_MAX]; + unsigned long mem_size = 0; + static const char *file_name; + + file_name = "memsize"; + snprintf(path, sizeof(path), "%s/%s", + sys_dir_path, file_name); + + if (eal_parse_sysfs_value(path, &mem_size) < 0) + return -1; + + if (mem_size == 0) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not" + " configured.\n",sys_dir_path, file_name); + if (mem_size % 2) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be" + " even number.\n",sys_dir_path, file_name); + + if (mem_size > DOM0_CONFIG_MEMSIZE) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger" + " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE); + + return mem_size; +} + +/** + * Based on physical address to caculate MFN in Xen Dom0. + */ +phys_addr_t +rte_xen_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +{ + int mfn_id; + uint64_t mfn, mfn_offset; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg *memseg = mcfg->memseg; + + mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; + + /*the MFN is contiguous in 2M */ + mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) % + RTE_PGSIZE_2M / PAGE_SIZE; + mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id]; + + /** return mechine address */ + return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE; +} + +int +rte_xen_dom0_memory_init(void) +{ + void *vir_addr, *vma_addr = NULL; + int err, ret = 0; + uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0; + size_t vma_len = 0; + struct memory_info meminfo; + struct memseg_info seginfo[RTE_MAX_MEMSEG]; + int flags, page_size = getpagesize(); + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg *memseg = mcfg->memseg; + uint64_t total_mem = internal_config.memory; + + memset(seginfo, 0, sizeof(seginfo)); + memset(&meminfo, 0, sizeof(struct memory_info)); + + mem_size = get_xen_memory_size(); + requested = (unsigned) (total_mem / 0x100000); + if (requested > mem_size) + /* if we didn't satisfy total memory requirements */ + rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB," + " available: %uMB\n", requested, mem_size); + else if (total_mem != 0) + mem_size = requested; + + /* Check FD and open once */ + if (xen_fd < 0) { + xen_fd = open(DOM0_MM_DEV, O_RDWR); + if (xen_fd < 0) { + RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); + return -1; + } + } + + meminfo.size = mem_size; + + /* construct memory mangement name for Dom0 */ + snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s", + internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); + + /* Notify kernel driver to allocate memory */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n"); + err = -EIO; + goto fail; + } + + /* Get number of memory segment from driver */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n"); + err = -EIO; + goto fail; + } + + if(num_memseg > RTE_MAX_MEMSEG){ + RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater" + " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG); + err = -EIO; + goto fail; + } + + /* get all memory segements information */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n"); + err = -EIO; + goto fail; + } + + /* map all memory segments to contiguous user space */ + for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++) + { + vma_len = seginfo[memseg_idx].size; + + /** + * get the biggest virtual memory area up to vma_len. If it fails, + * vma_addr is NULL, so let the kernel provide the address. + */ + vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M); + if (vma_addr == NULL) { + flags = MAP_SHARED; + vma_len = RTE_PGSIZE_2M; + } else + flags = MAP_SHARED | MAP_FIXED; + + seginfo[memseg_idx].size = vma_len; + vir_addr = mmap(vma_addr, seginfo[memseg_idx].size, + PROT_READ|PROT_WRITE, flags, xen_fd, + memseg_idx * page_size); + if (vir_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n", + DOM0_MM_DEV); + err = -EIO; + goto fail; + } + + memseg[memseg_idx].addr = vir_addr; + memseg[memseg_idx].phys_addr = page_size * + seginfo[memseg_idx].pfn ; + memseg[memseg_idx].len = seginfo[memseg_idx].size; + for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++) + memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i]; + + /* MFNs are continuous in 2M, so assume that page size is 2M */ + memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M; + + memseg[memseg_idx].nchannel = mcfg->nchannel; + memseg[memseg_idx].nrank = mcfg->nrank; + + /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/ + memseg[memseg_idx].socket_id = 0; + } + + return 0; +fail: + if (xen_fd > 0) { + close(xen_fd); + xen_fd = -1; + } + return err; +} + +/* + * This creates the memory mappings in the secondary process to match that of + * the server process. It goes through each memory segment in the DPDK runtime + * configuration, mapping them in order to form a contiguous block in the + * virtual memory space + */ +int +rte_xen_dom0_memory_attach(void) +{ + const struct rte_mem_config *mcfg; + unsigned s = 0; /* s used to track the segment number */ + int xen_fd = -1; + int ret = -1; + void *vir_addr; + char name[DOM0_NAME_MAX] = {0}; + int page_size = getpagesize(); + + mcfg = rte_eal_get_configuration()->mem_config; + + /* Check FD and open once */ + if (xen_fd < 0) { + xen_fd = open(DOM0_MM_DEV, O_RDWR); + if (xen_fd < 0) { + RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); + goto error; + } + } + + /* construct memory mangement name for Dom0 */ + snprintf(name, DOM0_NAME_MAX, "%s-%s", + internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); + /* attach to memory segments of primary process */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name); + if (ret) { + RTE_LOG(ERR, EAL,"attach memory segments fail.\n"); + goto error; + } + + /* map all segments into memory to make sure we get the addrs */ + for (s = 0; s < RTE_MAX_MEMSEG; ++s) { + + /* + * the first memory segment with len==0 is the one that + * follows the last valid segment. + */ + if (mcfg->memseg[s].len == 0) + break; + + vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd, + s * page_size); + if (vir_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in %s to requested address [%p]\n", + (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV, + mcfg->memseg[s].addr); + goto error; + } + } + return 0; + +error: + if (xen_fd >= 0) { + close(xen_fd); + xen_fd = -1; + } + return -1; +} diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h new file mode 100644 index 00000000..d9707780 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h @@ -0,0 +1,108 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_DOM0_COMMON_H_ +#define _RTE_DOM0_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +#define DOM0_NAME_MAX 256 +#define DOM0_MM_DEV "/dev/dom0_mm" + +#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */ +#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ +#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */ +#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ +#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ + +#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) +#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) +#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) +#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) + +/** + * A structure used to store memory information. + */ +struct memory_info { + char name[DOM0_NAME_MAX]; + uint64_t size; +}; + +/** + * A structure used to store memory segment information. + */ +struct memseg_info { + uint32_t idx; + uint64_t pfn; + uint64_t size; + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +}; + +/** + * A structure used to store memory block information. + */ +struct memblock_info { + uint8_t exchange_flag; + uint8_t used; + uint64_t vir_addr; + uint64_t pfn; + uint64_t mfn; +}; +#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h new file mode 100644 index 00000000..3dacbff8 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -0,0 +1,228 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#error "don't include this file directly, please include generic " +#endif + +#ifndef _RTE_LINUXAPP_INTERRUPTS_H_ +#define _RTE_LINUXAPP_INTERRUPTS_H_ + +#define RTE_MAX_RXTX_INTR_VEC_ID 32 +#define RTE_INTR_VEC_ZERO_OFFSET 0 +#define RTE_INTR_VEC_RXTX_OFFSET 1 + +enum rte_intr_handle_type { + RTE_INTR_HANDLE_UNKNOWN = 0, + RTE_INTR_HANDLE_UIO, /**< uio device handle */ + RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ + RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ + RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ + RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_MAX +}; + +#define RTE_INTR_EVENT_ADD 1UL +#define RTE_INTR_EVENT_DEL 2UL + +typedef void (*rte_intr_event_cb_t)(int fd, void *arg); + +struct rte_epoll_data { + uint32_t event; /**< event type */ + void *data; /**< User data */ + rte_intr_event_cb_t cb_fun; /**< IN: callback fun */ + void *cb_arg; /**< IN: callback arg */ +}; + +enum { + RTE_EPOLL_INVALID = 0, + RTE_EPOLL_VALID, + RTE_EPOLL_EXEC, +}; + +/** interrupt epoll event obj, taken by epoll_event.ptr */ +struct rte_epoll_event { + volatile uint32_t status; /**< OUT: event status */ + int fd; /**< OUT: event fd */ + int epfd; /**< OUT: epoll instance the ev associated with */ + struct rte_epoll_data epdata; +}; + +/** Handle for interrupts. */ +struct rte_intr_handle { + union { + int vfio_dev_fd; /**< VFIO device file descriptor */ + int uio_cfg_fd; /**< UIO config file descriptor + for uio_pci_generic */ + }; + int fd; /**< interrupt event file descriptor */ + enum rte_intr_handle_type type; /**< handle type */ + uint32_t max_intr; /**< max interrupt requested */ + uint32_t nb_efd; /**< number of available efd(event fd) */ + int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ + struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; + /**< intr vector epoll event */ + int *intr_vec; /**< intr vector number array */ +}; + +#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */ + +/** + * It waits for events on the epoll instance. + * + * @param epfd + * Epoll instance fd on which the caller wait for events. + * @param events + * Memory area contains the events that will be available for the caller. + * @param maxevents + * Up to maxevents are returned, must greater than zero. + * @param timeout + * Specifying a timeout of -1 causes a block indefinitely. + * Specifying a timeout equal to zero cause to return immediately. + * @return + * - On success, returns the number of available event. + * - On failure, a negative value. + */ +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout); + +/** + * It performs control operations on epoll instance referred by the epfd. + * It requests that the operation op be performed for the target fd. + * + * @param epfd + * Epoll instance fd on which the caller perform control operations. + * @param op + * The operation be performed for the target fd. + * @param fd + * The target fd on which the control ops perform. + * @param event + * Describes the object linked to the fd. + * Note: The caller must take care the object deletion after CTL_DEL. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event); + +/** + * The function returns the per thread epoll instance. + * + * @return + * epfd the epoll instance referred to. + */ +int +rte_intr_tls_epfd(void); + +/** + * @param intr_handle + * Pointer to the interrupt handle. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * @param op + * The operation be performed for the vector. + * Operation type of {ADD, DEL}. + * @param vec + * RX intr vector number added to the epoll instance wait list. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data); + +/** + * It enables the packet I/O interrupt event if it's necessary. + * It creates event fd for each interrupt vector when MSIX is used, + * otherwise it multiplexes a single event fd. + * + * @param intr_handle + * Pointer to the interrupt handle. + * @param nb_efd + * Number of interrupt vector trying to enable. + * The value 0 is not allowed. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); + +/** + * It disables the packet I/O interrupt event. + * It deletes registered eventfds and closes the open fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle); + +/** + * The packet I/O interrupt on datapath is enabled or not. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); + +/** + * The interrupt handle instance allows other causes or not. + * Other causes stand for any none packet I/O interrupts. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle); + +/** + * The multiple interrupt vector capability of interrupt handle instance. + * It returns zero if no multiple interrupt vector support. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); + +#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h new file mode 100644 index 00000000..7e5e5984 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -0,0 +1,172 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_KNI_COMMON_H_ +#define _RTE_KNI_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +/** + * KNI name is part of memzone name. + */ +#define RTE_KNI_NAMESIZE 32 + +#define RTE_CACHE_LINE_MIN_SIZE 64 + +/* + * Request id. + */ +enum rte_kni_req_id { + RTE_KNI_REQ_UNKNOWN = 0, + RTE_KNI_REQ_CHANGE_MTU, + RTE_KNI_REQ_CFG_NETWORK_IF, + RTE_KNI_REQ_MAX, +}; + +/* + * Structure for KNI request. + */ +struct rte_kni_request { + uint32_t req_id; /**< Request id */ + union { + uint32_t new_mtu; /**< New MTU */ + uint8_t if_up; /**< 1: interface up, 0: interface down */ + }; + int32_t result; /**< Result for processing request */ +} __attribute__((__packed__)); + +/* + * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO + * Write and read should wrap around. Fifo is empty when write == read + * Writing should never overwrite the read position + */ +struct rte_kni_fifo { + volatile unsigned write; /**< Next position to be written*/ + volatile unsigned read; /**< Next position to be read */ + unsigned len; /**< Circular buffer length */ + unsigned elem_size; /**< Pointer size - for 32/64 bit OS */ + void * volatile buffer[0]; /**< The buffer contains mbuf pointers */ +}; + +/* + * The kernel image of the rte_mbuf struct, with only the relevant fields. + * Padding is necessary to assure the offsets of these fields + */ +struct rte_kni_mbuf { + void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); + char pad0[10]; + uint16_t data_off; /**< Start address of data in segment buffer. */ + char pad1[4]; + uint64_t ol_flags; /**< Offload features. */ + char pad2[4]; + uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ + uint16_t data_len; /**< Amount of data in segment buffer. */ + + /* fields on second cache line */ + char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE))); + void *pool; + void *next; +}; + +/* + * Struct used to create a KNI device. Passed to the kernel in IOCTL call + */ + +struct rte_kni_device_info { + char name[RTE_KNI_NAMESIZE]; /**< Network device name for KNI */ + + phys_addr_t tx_phys; + phys_addr_t rx_phys; + phys_addr_t alloc_phys; + phys_addr_t free_phys; + + /* Used by Ethtool */ + phys_addr_t req_phys; + phys_addr_t resp_phys; + phys_addr_t sync_phys; + void * sync_va; + + /* mbuf mempool */ + void * mbuf_va; + phys_addr_t mbuf_phys; + + /* PCI info */ + uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ + uint8_t bus; /**< Device bus */ + uint8_t devid; /**< Device ID */ + uint8_t function; /**< Device function. */ + + uint16_t group_id; /**< Group ID */ + uint32_t core_id; /**< core ID to bind for kernel thread */ + + uint8_t force_bind : 1; /**< Flag for kernel thread binding */ + + /* mbuf size */ + unsigned mbuf_size; +}; + +#define KNI_DEVICE "kni" + +#define RTE_KNI_IOCTL_TEST _IOWR(0, 1, int) +#define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info) +#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info) + +#endif /* _RTE_KNI_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map new file mode 100644 index 00000000..12503efa --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -0,0 +1,156 @@ +DPDK_2.0 { + global: + + __rte_panic; + devargs_list; + eal_parse_sysfs_value; + eal_timer_source; + lcore_config; + pci_device_list; + pci_driver_list; + per_lcore__lcore_id; + per_lcore__rte_errno; + rte_calloc; + rte_calloc_socket; + rte_cpu_check_supported; + rte_cpu_get_flag_enabled; + rte_cycles_vmware_tsc_map; + rte_delay_us; + rte_dump_physmem_layout; + rte_dump_registers; + rte_dump_stack; + rte_dump_tailq; + rte_eal_alarm_cancel; + rte_eal_alarm_set; + rte_eal_dev_init; + rte_eal_devargs_add; + rte_eal_devargs_dump; + rte_eal_devargs_type_count; + rte_eal_driver_register; + rte_eal_driver_unregister; + rte_eal_get_configuration; + rte_eal_get_lcore_state; + rte_eal_get_physmem_layout; + rte_eal_get_physmem_size; + rte_eal_has_hugepages; + rte_eal_hpet_init; + rte_eal_init; + rte_eal_iopl_init; + rte_eal_lcore_role; + rte_eal_mp_remote_launch; + rte_eal_mp_wait_lcore; + rte_eal_parse_devargs_str; + rte_eal_pci_dump; + rte_eal_pci_probe; + rte_eal_pci_probe_one; + rte_eal_pci_register; + rte_eal_pci_scan; + rte_eal_pci_unregister; + rte_eal_process_type; + rte_eal_remote_launch; + rte_eal_tailq_lookup; + rte_eal_tailq_register; + rte_eal_vdev_init; + rte_eal_vdev_uninit; + rte_eal_wait_lcore; + rte_exit; + rte_free; + rte_get_hpet_cycles; + rte_get_hpet_hz; + rte_get_log_level; + rte_get_log_type; + rte_get_tsc_hz; + rte_hexdump; + rte_intr_callback_register; + rte_intr_callback_unregister; + rte_intr_disable; + rte_intr_enable; + rte_log; + rte_log_add_in_history; + rte_log_cur_msg_loglevel; + rte_log_cur_msg_logtype; + rte_log_dump_history; + rte_log_set_history; + rte_logs; + rte_malloc; + rte_malloc_dump_stats; + rte_malloc_get_socket_stats; + rte_malloc_set_limit; + rte_malloc_socket; + rte_malloc_validate; + rte_malloc_virt2phy; + rte_mem_lock_page; + rte_mem_phy2mch; + rte_mem_virt2phy; + rte_memdump; + rte_memory_get_nchannel; + rte_memory_get_nrank; + rte_memzone_dump; + rte_memzone_lookup; + rte_memzone_reserve; + rte_memzone_reserve_aligned; + rte_memzone_reserve_bounded; + rte_memzone_walk; + rte_openlog_stream; + rte_realloc; + rte_set_application_usage_hook; + rte_set_log_level; + rte_set_log_type; + rte_socket_id; + rte_strerror; + rte_strsplit; + rte_sys_gettid; + rte_thread_get_affinity; + rte_thread_set_affinity; + rte_vlog; + rte_xen_dom0_memory_attach; + rte_xen_dom0_memory_init; + rte_zmalloc; + rte_zmalloc_socket; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_eal_pci_detach; + rte_eal_pci_read_config; + rte_eal_pci_write_config; + rte_epoll_ctl; + rte_epoll_wait; + rte_intr_allow_others; + rte_intr_dp_is_en; + rte_intr_efd_disable; + rte_intr_efd_enable; + rte_intr_rx_ctl; + rte_intr_tls_epfd; + rte_memzone_free; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_intr_cap_multiple; + rte_keepalive_create; + rte_keepalive_dispatch_pings; + rte_keepalive_mark_alive; + rte_keepalive_register_core; + rte_xen_dom0_supported; + +} DPDK_2.1; + +DPDK_16.04 { + global: + + rte_cpu_get_flag_name; + rte_eal_pci_ioport_map; + rte_eal_pci_ioport_read; + rte_eal_pci_ioport_unmap; + rte_eal_pci_ioport_write; + rte_eal_pci_map_device; + rte_eal_pci_unmap_device; + rte_eal_primary_proc_alive; + +} DPDK_2.2; diff --git a/lib/librte_eal/linuxapp/igb_uio/Makefile b/lib/librte_eal/linuxapp/igb_uio/Makefile new file mode 100644 index 00000000..ec6e702f --- /dev/null +++ b/lib/librte_eal/linuxapp/igb_uio/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = igb_uio +MODULE_PATH = drivers/net/igb_uio + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100 +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -Winline -Wall -Werror +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h + +# +# all source are stored in SRCS-y +# +SRCS-y := igb_uio.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h new file mode 100644 index 00000000..c1d45a66 --- /dev/null +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -0,0 +1,116 @@ +/* + * Minimal wrappers to allow compiling igb_uio on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) +#define pci_cfg_access_lock pci_block_user_cfg_access +#define pci_cfg_access_unlock pci_unblock_user_cfg_access +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) +#define HAVE_PTE_MASK_PAGE_IOMAP +#endif + +#ifndef PCI_MSIX_ENTRY_SIZE +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 +#define PCI_MSIX_ENTRY_DATA 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9))) + +static int pci_num_vf(struct pci_dev *dev) +{ + struct iov { + int pos; + int nres; + u32 cap; + u16 ctrl; + u16 total; + u16 initial; + u16 nr_virtfn; + } *iov = (struct iov *)dev->sriov; + + if (!dev->is_physfn) + return 0; + + return iov->nr_virtfn; +} + +#endif /* < 2.6.34 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3))) + +/* Check if INTX works to control irq's. + * Set's INTX_DISABLE flag and reads it back + */ +static bool pci_intx_mask_supported(struct pci_dev *pdev) +{ + bool mask_supported = false; + uint16_t orig, new; + + pci_block_user_cfg_access(pdev); + pci_read_config_word(pdev, PCI_COMMAND, &orig); + pci_write_config_word(pdev, PCI_COMMAND, + orig ^ PCI_COMMAND_INTX_DISABLE); + pci_read_config_word(pdev, PCI_COMMAND, &new); + + if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { + dev_err(&pdev->dev, "Command register changed from " + "0x%x to 0x%x: driver or hardware bug?\n", orig, new); + } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) { + mask_supported = true; + pci_write_config_word(pdev, PCI_COMMAND, orig); + } + pci_unblock_user_cfg_access(pdev); + + return mask_supported; +} + +static bool pci_check_and_mask_intx(struct pci_dev *pdev) +{ + bool pending; + uint32_t status; + + pci_block_user_cfg_access(pdev); + pci_read_config_dword(pdev, PCI_COMMAND, &status); + + /* interrupt is not ours, goes to out */ + pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0); + if (pending) { + uint16_t old, new; + + old = status; + if (status != 0) + new = old & (~PCI_COMMAND_INTX_DISABLE); + else + new = old | PCI_COMMAND_INTX_DISABLE; + + if (old != new) + pci_write_config_word(pdev, PCI_COMMAND, new); + } + pci_unblock_user_cfg_access(pdev); + + return pending; +} + +#endif /* < 3.3.0 */ diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c new file mode 100644 index 00000000..72b26923 --- /dev/null +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -0,0 +1,580 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_XEN_DOM0 +#include +#endif +#include + +#include "compat.h" + +/** + * A structure describing the private information for a uio device. + */ +struct rte_uio_pci_dev { + struct uio_info info; + struct pci_dev *pdev; + enum rte_intr_mode mode; +}; + +static char *intr_mode; +static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; + +/* sriov sysfs */ +static ssize_t +show_max_vfs(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 10, "%u\n", dev_num_vf(dev)); +} + +static ssize_t +store_max_vfs(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int err = 0; + unsigned long max_vfs; + struct pci_dev *pdev = to_pci_dev(dev); + + if (0 != kstrtoul(buf, 0, &max_vfs)) + return -EINVAL; + + if (0 == max_vfs) + pci_disable_sriov(pdev); + else if (0 == pci_num_vf(pdev)) + err = pci_enable_sriov(pdev, max_vfs); + else /* do nothing if change max_vfs number */ + err = -EINVAL; + + return err ? err : count; +} + +#ifdef RTE_PCI_CONFIG +static ssize_t +show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} + +static ssize_t +store_extended_tag(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} + +static ssize_t +show_max_read_request_size(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} + +static ssize_t +store_max_read_request_size(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} +#endif + +static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs); +#ifdef RTE_PCI_CONFIG +static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag, + store_extended_tag); +static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR, + show_max_read_request_size, store_max_read_request_size); +#endif + +static struct attribute *dev_attrs[] = { + &dev_attr_max_vfs.attr, +#ifdef RTE_PCI_CONFIG + &dev_attr_extended_tag.attr, + &dev_attr_max_read_request_size.attr, +#endif + NULL, +}; + +static const struct attribute_group dev_attr_grp = { + .attrs = dev_attrs, +}; +/* + * It masks the msix on/off of generating MSI-X messages. + */ +static void +igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) +{ + u32 mask_bits = desc->masked; + unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + + if (state != 0) + mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; + else + mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + + if (mask_bits != desc->masked) { + writel(mask_bits, desc->mask_base + offset); + readl(desc->mask_base); + desc->masked = mask_bits; + } +} + +/** + * This is the irqcontrol callback to be registered to uio_info. + * It can be used to disable/enable interrupt from user space processes. + * + * @param info + * pointer to uio_info. + * @param irq_state + * state value. 1 to enable interrupt, 0 to disable interrupt. + * + * @return + * - On success, 0. + * - On failure, a negative value. + */ +static int +igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) +{ + struct rte_uio_pci_dev *udev = info->priv; + struct pci_dev *pdev = udev->pdev; + + pci_cfg_access_lock(pdev); + if (udev->mode == RTE_INTR_MODE_LEGACY) + pci_intx(pdev, !!irq_state); + + else if (udev->mode == RTE_INTR_MODE_MSIX) { + struct msi_desc *desc; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) + list_for_each_entry(desc, &pdev->msi_list, list) + igbuio_msix_mask_irq(desc, irq_state); +#else + list_for_each_entry(desc, &pdev->dev.msi_list, list) + igbuio_msix_mask_irq(desc, irq_state); +#endif + } + pci_cfg_access_unlock(pdev); + + return 0; +} + +/** + * This is interrupt handler which will check if the interrupt is for the right device. + * If yes, disable it here and will be enable later. + */ +static irqreturn_t +igbuio_pci_irqhandler(int irq, struct uio_info *info) +{ + struct rte_uio_pci_dev *udev = info->priv; + + /* Legacy mode need to mask in hardware */ + if (udev->mode == RTE_INTR_MODE_LEGACY && + !pci_check_and_mask_intx(udev->pdev)) + return IRQ_NONE; + + /* Message signal mode, no share IRQ and automasked */ + return IRQ_HANDLED; +} + +#ifdef CONFIG_XEN_DOM0 +static int +igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma) +{ + int idx; + + idx = (int)vma->vm_pgoff; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +#ifdef HAVE_PTE_MASK_PAGE_IOMAP + vma->vm_page_prot.pgprot |= _PAGE_IOMAP; +#endif + + return remap_pfn_range(vma, + vma->vm_start, + info->mem[idx].addr >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +/** + * This is uio device mmap method which will use igbuio mmap for Xen + * Dom0 environment. + */ +static int +igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma) +{ + int idx; + + if (vma->vm_pgoff >= MAX_UIO_MAPS) + return -EINVAL; + + if (info->mem[vma->vm_pgoff].size == 0) + return -EINVAL; + + idx = (int)vma->vm_pgoff; + switch (info->mem[idx].memtype) { + case UIO_MEM_PHYS: + return igbuio_dom0_mmap_phys(info, vma); + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + default: + return -EINVAL; + } +} +#endif + +/* Remap pci resources described by bar #pci_bar in uio resource n. */ +static int +igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info, + int n, int pci_bar, const char *name) +{ + unsigned long addr, len; + void *internal_addr; + + if (n >= ARRAY_SIZE(info->mem)) + return -EINVAL; + + addr = pci_resource_start(dev, pci_bar); + len = pci_resource_len(dev, pci_bar); + if (addr == 0 || len == 0) + return -1; + internal_addr = ioremap(addr, len); + if (internal_addr == NULL) + return -1; + info->mem[n].name = name; + info->mem[n].addr = addr; + info->mem[n].internal_addr = internal_addr; + info->mem[n].size = len; + info->mem[n].memtype = UIO_MEM_PHYS; + return 0; +} + +/* Get pci port io resources described by bar #pci_bar in uio resource n. */ +static int +igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info, + int n, int pci_bar, const char *name) +{ + unsigned long addr, len; + + if (n >= ARRAY_SIZE(info->port)) + return -EINVAL; + + addr = pci_resource_start(dev, pci_bar); + len = pci_resource_len(dev, pci_bar); + if (addr == 0 || len == 0) + return -EINVAL; + + info->port[n].name = name; + info->port[n].start = addr; + info->port[n].size = len; + info->port[n].porttype = UIO_PORT_X86; + + return 0; +} + +/* Unmap previously ioremap'd resources */ +static void +igbuio_pci_release_iomem(struct uio_info *info) +{ + int i; + + for (i = 0; i < MAX_UIO_MAPS; i++) { + if (info->mem[i].internal_addr) + iounmap(info->mem[i].internal_addr); + } +} + +static int +igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) +{ + int i, iom, iop, ret; + unsigned long flags; + static const char *bar_names[PCI_STD_RESOURCE_END + 1] = { + "BAR0", + "BAR1", + "BAR2", + "BAR3", + "BAR4", + "BAR5", + }; + + iom = 0; + iop = 0; + + for (i = 0; i < ARRAY_SIZE(bar_names); i++) { + if (pci_resource_len(dev, i) != 0 && + pci_resource_start(dev, i) != 0) { + flags = pci_resource_flags(dev, i); + if (flags & IORESOURCE_MEM) { + ret = igbuio_pci_setup_iomem(dev, info, iom, + i, bar_names[i]); + if (ret != 0) + return ret; + iom++; + } else if (flags & IORESOURCE_IO) { + ret = igbuio_pci_setup_ioport(dev, info, iop, + i, bar_names[i]); + if (ret != 0) + return ret; + iop++; + } + } + } + + return (iom != 0) ? ret : -ENOENT; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +static int __devinit +#else +static int +#endif +igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct rte_uio_pci_dev *udev; + struct msix_entry msix_entry; + int err; + + udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); + if (!udev) + return -ENOMEM; + + /* + * enable device: ask low-level code to enable I/O and + * memory + */ + err = pci_enable_device(dev); + if (err != 0) { + dev_err(&dev->dev, "Cannot enable PCI device\n"); + goto fail_free; + } + + /* + * reserve device's PCI memory regions for use by this + * module + */ + err = pci_request_regions(dev, "igb_uio"); + if (err != 0) { + dev_err(&dev->dev, "Cannot request regions\n"); + goto fail_disable; + } + + /* enable bus mastering on the device */ + pci_set_master(dev); + + /* remap IO memory */ + err = igbuio_setup_bars(dev, &udev->info); + if (err != 0) + goto fail_release_iomem; + + /* set 64-bit DMA mask */ + err = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (err != 0) { + dev_err(&dev->dev, "Cannot set DMA mask\n"); + goto fail_release_iomem; + } + + err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64)); + if (err != 0) { + dev_err(&dev->dev, "Cannot set consistent DMA mask\n"); + goto fail_release_iomem; + } + + /* fill uio infos */ + udev->info.name = "igb_uio"; + udev->info.version = "0.1"; + udev->info.handler = igbuio_pci_irqhandler; + udev->info.irqcontrol = igbuio_pci_irqcontrol; +#ifdef CONFIG_XEN_DOM0 + /* check if the driver run on Xen Dom0 */ + if (xen_initial_domain()) + udev->info.mmap = igbuio_dom0_pci_mmap; +#endif + udev->info.priv = udev; + udev->pdev = dev; + + switch (igbuio_intr_mode_preferred) { + case RTE_INTR_MODE_MSIX: + /* Only 1 msi-x vector needed */ + msix_entry.entry = 0; + if (pci_enable_msix(dev, &msix_entry, 1) == 0) { + dev_dbg(&dev->dev, "using MSI-X"); + udev->info.irq = msix_entry.vector; + udev->mode = RTE_INTR_MODE_MSIX; + break; + } + /* fall back to INTX */ + case RTE_INTR_MODE_LEGACY: + if (pci_intx_mask_supported(dev)) { + dev_dbg(&dev->dev, "using INTX"); + udev->info.irq_flags = IRQF_SHARED; + udev->info.irq = dev->irq; + udev->mode = RTE_INTR_MODE_LEGACY; + break; + } + dev_notice(&dev->dev, "PCI INTX mask not supported\n"); + /* fall back to no IRQ */ + case RTE_INTR_MODE_NONE: + udev->mode = RTE_INTR_MODE_NONE; + udev->info.irq = 0; + break; + + default: + dev_err(&dev->dev, "invalid IRQ mode %u", + igbuio_intr_mode_preferred); + err = -EINVAL; + goto fail_release_iomem; + } + + err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); + if (err != 0) + goto fail_release_iomem; + + /* register uio driver */ + err = uio_register_device(&dev->dev, &udev->info); + if (err != 0) + goto fail_remove_group; + + pci_set_drvdata(dev, udev); + + dev_info(&dev->dev, "uio device registered with irq %lx\n", + udev->info.irq); + + return 0; + +fail_remove_group: + sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); +fail_release_iomem: + igbuio_pci_release_iomem(&udev->info); + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(udev->pdev); + pci_release_regions(dev); +fail_disable: + pci_disable_device(dev); +fail_free: + kfree(udev); + + return err; +} + +static void +igbuio_pci_remove(struct pci_dev *dev) +{ + struct rte_uio_pci_dev *udev = pci_get_drvdata(dev); + + sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); + uio_unregister_device(&udev->info); + igbuio_pci_release_iomem(&udev->info); + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(dev); + pci_release_regions(dev); + pci_disable_device(dev); + pci_set_drvdata(dev, NULL); + kfree(udev); +} + +static int +igbuio_config_intr_mode(char *intr_str) +{ + if (!intr_str) { + pr_info("Use MSIX interrupt by default\n"); + return 0; + } + + if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) { + igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; + pr_info("Use MSIX interrupt\n"); + } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) { + igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY; + pr_info("Use legacy interrupt\n"); + } else { + pr_info("Error: bad parameter - %s\n", intr_str); + return -EINVAL; + } + + return 0; +} + +static struct pci_driver igbuio_pci_driver = { + .name = "igb_uio", + .id_table = NULL, + .probe = igbuio_pci_probe, + .remove = igbuio_pci_remove, +}; + +static int __init +igbuio_pci_init_module(void) +{ + int ret; + + ret = igbuio_config_intr_mode(intr_mode); + if (ret < 0) + return ret; + + return pci_register_driver(&igbuio_pci_driver); +} + +static void __exit +igbuio_pci_exit_module(void) +{ + pci_unregister_driver(&igbuio_pci_driver); +} + +module_init(igbuio_pci_init_module); +module_exit(igbuio_pci_exit_module); + +module_param(intr_mode, charp, S_IRUGO); +MODULE_PARM_DESC(intr_mode, +"igb_uio interrupt mode (default=msix):\n" +" " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" +" " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" +"\n"); + +MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile new file mode 100644 index 00000000..ac99d3f1 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -0,0 +1,93 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = rte_kni + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/ethtool/igb +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h +MODULE_CFLAGS += -Wall -Werror + +ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu) +MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .) +UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \ + | cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1) +MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" +endif + +# this lib needs main eal +DEPDIRS-y += lib/librte_eal/linuxapp/eal + +# +# all source are stored in SRCS-y +# +SRCS-y := ethtool/ixgbe/ixgbe_main.c +SRCS-y += ethtool/ixgbe/ixgbe_api.c +SRCS-y += ethtool/ixgbe/ixgbe_common.c +SRCS-y += ethtool/ixgbe/ixgbe_ethtool.c +SRCS-y += ethtool/ixgbe/ixgbe_82599.c +SRCS-y += ethtool/ixgbe/ixgbe_82598.c +SRCS-y += ethtool/ixgbe/ixgbe_x540.c +SRCS-y += ethtool/ixgbe/ixgbe_phy.c +SRCS-y += ethtool/ixgbe/kcompat.c + +SRCS-y += ethtool/igb/e1000_82575.c +SRCS-y += ethtool/igb/e1000_i210.c +SRCS-y += ethtool/igb/e1000_api.c +SRCS-y += ethtool/igb/e1000_mac.c +SRCS-y += ethtool/igb/e1000_manage.c +SRCS-y += ethtool/igb/e1000_mbx.c +SRCS-y += ethtool/igb/e1000_nvm.c +SRCS-y += ethtool/igb/e1000_phy.c +SRCS-y += ethtool/igb/igb_ethtool.c +SRCS-y += ethtool/igb/igb_hwmon.c +SRCS-y += ethtool/igb/igb_main.c +SRCS-y += ethtool/igb/igb_debugfs.c +SRCS-y += ethtool/igb/igb_param.c +SRCS-y += ethtool/igb/igb_procfs.c +SRCS-y += ethtool/igb/igb_vmdq.c +#SRCS-y += ethtool/igb/igb_ptp.c +#SRCS-y += ethtool/igb/kcompat.c + +SRCS-y += kni_misc.c +SRCS-y += kni_net.c +SRCS-y += kni_ethtool.c +SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h new file mode 100644 index 00000000..cf100b67 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -0,0 +1,29 @@ +/* + * Minimal wrappers to allow compiling kni on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) + +#define sk_sleep(s) (s)->sk_sleep + +#endif /* < 2.6.35 */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) +#define HAVE_IOV_ITER_MSGHDR +#endif + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) ) +#define HAVE_KIOCB_MSG_PARAM +#endif /* < 4.1.0 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/README b/lib/librte_eal/linuxapp/kni/ethtool/README new file mode 100644 index 00000000..2cfefe72 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/README @@ -0,0 +1,100 @@ +.. + BSD LICENSE + + Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Description + +In order to support ethtool in Kernel NIC Interface, the standard Linux kernel +drivers of ixgbe/igb are needed to be reused here. ixgbe-3.9.17 is the version +modified from in kernel NIC interface kernel module to support ixgbe NIC, and +igb-3.4.8 is the version modified from in kernel NIC interface kernel module to +support igb NIC. + +The source code package of ixgbe can be downloaded from sourceforge.net as below. +http://sourceforge.net/projects/e1000/files/ixgbe%20stable/ +Below source files are copied or modified from ixgbe. + +ixgbe_82598.h +ixgbe_82599.c +ixgbe_82599.h +ixgbe_api.c +ixgbe_api.h +ixgbe_common.c +ixgbe_common.h +ixgbe_dcb.h +ixgbe_ethtool.c +ixgbe_fcoe.h +ixgbe.h +ixgbe_main.c +ixgbe_mbx.h +ixgbe_osdep.h +ixgbe_phy.c +ixgbe_phy.h +ixgbe_sriov.h +ixgbe_type.h +kcompat.c +kcompat.h + +The source code package of igb can be downloaded from sourceforge.net as below. +http://sourceforge.net/projects/e1000/files/igb%20stable/ +Below source files are copied or modified from igb. + +e1000_82575.c +e1000_82575.h +e1000_api.c +e1000_api.h +e1000_defines.h +e1000_hw.h +e1000_mac.c +e1000_mac.h +e1000_manage.c +e1000_manage.h +e1000_mbx.c +e1000_mbx.h +e1000_nvm.c +e1000_nvm.h +e1000_osdep.h +e1000_phy.c +e1000_phy.h +e1000_regs.h +igb_ethtool.c +igb.h +igb_main.c +igb_param.c +igb_procfs.c +igb_regtest.h +igb_sysfs.c +igb_vmdq.c +igb_vmdq.h +kcompat.c +kcompat_ethtool.c +kcompat.h + diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING new file mode 100644 index 00000000..5f297e5b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING @@ -0,0 +1,339 @@ + +"This software program is licensed subject to the GNU General Public License +(GPL). Version 2, June 1991, available at +" + +GNU General Public License + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public License is intended +to guarantee your freedom to share and change free software--to make sure +the software is free for all its users. This General Public License applies +to most of the Free Software Foundation's software and to any other program +whose authors commit to using it. (Some other Free Software Foundation +software is covered by the GNU Library General Public License instead.) You +can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you +wish), that you receive source code or can get it if you want it, that you +can change the software or use pieces of it in new free programs; and that +you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to +deny you these rights or to ask you to surrender the rights. These +restrictions translate to certain responsibilities for you if you distribute +copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or +for a fee, you must give the recipients all the rights that you have. You +must make sure that they, too, receive or can get the source code. And you +must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If +the software is modified by someone else and passed on, we want its +recipients to know that what they have is not the original, so that any +problems introduced by others will not reflect on the original authors' +reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will +individually obtain patent licenses, in effect making the program +proprietary. To prevent this, we have made it clear that any patent must be +licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice + placed by the copyright holder saying it may be distributed under the + terms of this General Public License. The "Program", below, refers to any + such program or work, and a "work based on the Program" means either the + Program or any derivative work under copyright law: that is to say, a + work containing the Program or a portion of it, either verbatim or with + modifications and/or translated into another language. (Hereinafter, + translation is included without limitation in the term "modification".) + Each licensee is addressed as "you". + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + the Program is not restricted, and the output from the Program is covered + only if its contents constitute a work based on the Program (independent + of having been made by running the Program). Whether that is true depends + on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code + as you receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice and + disclaimer of warranty; keep intact all the notices that refer to this + License and to the absence of any warranty; and give any other recipients + of the Program a copy of this License along with the Program. + + You may charge a fee for the physical act of transferring a copy, and you + may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, + thus forming a work based on the Program, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + * a) You must cause the modified files to carry prominent notices stating + that you changed the files and the date of any change. + + * b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any part + thereof, to be licensed as a whole at no charge to all third parties + under the terms of this License. + + * c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive + use in the most ordinary way, to print or display an announcement + including an appropriate copyright notice and a notice that there is + no warranty (or else, saying that you provide a warranty) and that + users may redistribute the program under these conditions, and + telling the user how to view a copy of this License. (Exception: if + the Program itself is interactive but does not normally print such + an announcement, your work based on the Program is not required to + print an announcement.) + + These requirements apply to the modified work as a whole. If identifiable + sections of that work are not derived from the Program, and can be + reasonably considered independent and separate works in themselves, then + this License, and its terms, do not apply to those sections when you + distribute them as separate works. But when you distribute the same + sections as part of a whole which is a work based on the Program, the + distribution of the whole must be on the terms of this License, whose + permissions for other licensees extend to the entire whole, and thus to + each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Program. + + In addition, mere aggregation of another work not based on the Program + with the Program (or with a work based on the Program) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under + Section 2) in object code or executable form under the terms of Sections + 1 and 2 above provided that you also do one of the following: + + * a) Accompany it with the complete corresponding machine-readable source + code, which must be distributed under the terms of Sections 1 and 2 + above on a medium customarily used for software interchange; or, + + * b) Accompany it with a written offer, valid for at least three years, + to give any third party, for a charge no more than your cost of + physically performing source distribution, a complete machine- + readable copy of the corresponding source code, to be distributed + under the terms of Sections 1 and 2 above on a medium customarily + used for software interchange; or, + + * c) Accompany it with the information you received as to the offer to + distribute corresponding source code. (This alternative is allowed + only for noncommercial distribution and only if you received the + program in object code or executable form with such an offer, in + accord with Subsection b above.) + + The source code for a work means the preferred form of the work for + making modifications to it. For an executable work, complete source code + means all the source code for all modules it contains, plus any + associated interface definition files, plus the scripts used to control + compilation and installation of the executable. However, as a special + exception, the source code distributed need not include anything that is + normally distributed (in either source or binary form) with the major + components (compiler, kernel, and so on) of the operating system on which + the executable runs, unless that component itself accompanies the + executable. + + If distribution of executable or object code is made by offering access + to copy from a designated place, then offering equivalent access to copy + the source code from the same place counts as distribution of the source + code, even though third parties are not compelled to copy the source + along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as + expressly provided under this License. Any attempt otherwise to copy, + modify, sublicense or distribute the Program is void, and will + automatically terminate your rights under this License. However, parties + who have received copies, or rights, from you under this License will not + have their licenses terminated so long as such parties remain in full + compliance. + +5. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Program or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Program (or any work based on the Program), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Program or works + based on it. + +6. Each time you redistribute the Program (or any work based on the + Program), the recipient automatically receives a license from the + original licensor to copy, distribute or modify the Program subject to + these terms and conditions. You may not impose any further restrictions + on the recipients' exercise of the rights granted herein. You are not + responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot distribute + so as to satisfy simultaneously your obligations under this License and + any other pertinent obligations, then as a consequence you may not + distribute the Program at all. For example, if a patent license would + not permit royalty-free redistribution of the Program by all those who + receive copies directly or indirectly through you, then the only way you + could satisfy both it and this License would be to refrain entirely from + distribution of the Program. + + If any portion of this section is held invalid or unenforceable under any + particular circumstance, the balance of the section is intended to apply + and the section as a whole is intended to apply in other circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system, which is implemented + by public license practices. Many people have made generous contributions + to the wide range of software distributed through that system in + reliance on consistent application of that system; it is up to the + author/donor to decide if he or she is willing to distribute software + through any other system and a licensee cannot impose that choice. + + This section is intended to make thoroughly clear what is believed to be + a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Program under this License may add an + explicit geographical distribution limitation excluding those countries, + so that distribution is permitted only in or among countries not thus + excluded. In such case, this License incorporates the limitation as if + written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of + the General Public License from time to time. Such new versions will be + similar in spirit to the present version, but may differ in detail to + address new problems or concerns. + + Each version is given a distinguishing version number. If the Program + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Program does not specify a version + number of this License, you may choose any version ever published by the + Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs + whose distribution conditions are different, write to the author to ask + for permission. For software which is copyrighted by the Free Software + Foundation, write to the Free Software Foundation; we sometimes make + exceptions for this. Our decision will be guided by the two goals of + preserving the free status of all derivatives of our free software and + of promoting the sharing and reuse of software generally. + + NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH + YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it free +software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey the +exclusion of warranty; and each file should have at least the "copyright" +line and a pointer to where the full notice is found. + +one line to give the program's name and an idea of what it does. +Copyright (C) yyyy name of author + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 +Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when +it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes +with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free +software, and you are welcome to redistribute it under certain conditions; +type 'show c' for details. + +The hypothetical commands 'show w' and 'show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than 'show w' and 'show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program +'Gnomovision' (which makes passes at compilers) written by James Hacker. + +signature of Ty Coon, 1 April 1989 +Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General Public +License instead of this License. diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c new file mode 100644 index 00000000..b8c9a13f --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c @@ -0,0 +1,3665 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* + * 82575EB Gigabit Network Connection + * 82575EB Gigabit Backplane Connection + * 82575GB Gigabit Network Connection + * 82576 Gigabit Network Connection + * 82576 Quad Port Gigabit Mezzanine Adapter + * 82580 Gigabit Network Connection + * I350 Gigabit Network Connection + */ + +#include "e1000_api.h" +#include "e1000_i210.h" + +static s32 e1000_init_phy_params_82575(struct e1000_hw *hw); +static s32 e1000_init_mac_params_82575(struct e1000_hw *hw); +static s32 e1000_acquire_phy_82575(struct e1000_hw *hw); +static void e1000_release_phy_82575(struct e1000_hw *hw); +static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw); +static void e1000_release_nvm_82575(struct e1000_hw *hw); +static s32 e1000_check_for_link_82575(struct e1000_hw *hw); +static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw); +static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw); +static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, + u16 *duplex); +static s32 e1000_init_hw_82575(struct e1000_hw *hw); +static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw); +static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, + u16 *data); +static s32 e1000_reset_hw_82575(struct e1000_hw *hw); +static s32 e1000_reset_hw_82580(struct e1000_hw *hw); +static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, + u32 offset, u16 *data); +static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, + u32 offset, u16 data); +static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, + bool active); +static s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, + bool active); +static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, + bool active); +static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw); +static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw); +static s32 e1000_get_media_type_82575(struct e1000_hw *hw); +static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw); +static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data); +static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, + u32 offset, u16 data); +static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw); +static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask); +static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, + u16 *speed, u16 *duplex); +static s32 e1000_get_phy_id_82575(struct e1000_hw *hw); +static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask); +static bool e1000_sgmii_active_82575(struct e1000_hw *hw); +static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); +static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); +static void e1000_config_collision_dist_82575(struct e1000_hw *hw); +static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw); +static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw); +static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw); +static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw); +static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw); +static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw); +static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw); +static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, + u16 offset); +static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, + u16 offset); +static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); +static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); +static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); +static void e1000_clear_vfta_i350(struct e1000_hw *hw); + +static void e1000_i2c_start(struct e1000_hw *hw); +static void e1000_i2c_stop(struct e1000_hw *hw); +static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data); +static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data); +static s32 e1000_get_i2c_ack(struct e1000_hw *hw); +static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data); +static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data); +static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); +static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); +static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data); +static bool e1000_get_i2c_data(u32 *i2cctl); + +static const u16 e1000_82580_rxpbs_table[] = { + 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; +#define E1000_82580_RXPBS_TABLE_SIZE \ + (sizeof(e1000_82580_rxpbs_table)/sizeof(u16)) + + +/** + * e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO + * @hw: pointer to the HW structure + * + * Called to determine if the I2C pins are being used for I2C or as an + * external MDIO interface since the two options are mutually exclusive. + **/ +static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw) +{ + u32 reg = 0; + bool ext_mdio = false; + + DEBUGFUNC("e1000_sgmii_uses_mdio_82575"); + + switch (hw->mac.type) { + case e1000_82575: + case e1000_82576: + reg = E1000_READ_REG(hw, E1000_MDIC); + ext_mdio = !!(reg & E1000_MDIC_DEST); + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + reg = E1000_READ_REG(hw, E1000_MDICNFG); + ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO); + break; + default: + break; + } + return ext_mdio; +} + +/** + * e1000_init_phy_params_82575 - Init PHY func ptrs. + * @hw: pointer to the HW structure + **/ +static s32 e1000_init_phy_params_82575(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u32 ctrl_ext; + + DEBUGFUNC("e1000_init_phy_params_82575"); + + phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic; + phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic; + + if (hw->phy.media_type != e1000_media_type_copper) { + phy->type = e1000_phy_none; + goto out; + } + + phy->ops.power_up = e1000_power_up_phy_copper; + phy->ops.power_down = e1000_power_down_phy_copper_82575; + + phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; + phy->reset_delay_us = 100; + + phy->ops.acquire = e1000_acquire_phy_82575; + phy->ops.check_reset_block = e1000_check_reset_block_generic; + phy->ops.commit = e1000_phy_sw_reset_generic; + phy->ops.get_cfg_done = e1000_get_cfg_done_82575; + phy->ops.release = e1000_release_phy_82575; + + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + + if (e1000_sgmii_active_82575(hw)) { + phy->ops.reset = e1000_phy_hw_reset_sgmii_82575; + ctrl_ext |= E1000_CTRL_I2C_ENA; + } else { + phy->ops.reset = e1000_phy_hw_reset_generic; + ctrl_ext &= ~E1000_CTRL_I2C_ENA; + } + + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + e1000_reset_mdicnfg_82580(hw); + + if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) { + phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575; + phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575; + } else { + switch (hw->mac.type) { + case e1000_82580: + case e1000_i350: + case e1000_i354: + phy->ops.read_reg = e1000_read_phy_reg_82580; + phy->ops.write_reg = e1000_write_phy_reg_82580; + break; + case e1000_i210: + case e1000_i211: + phy->ops.read_reg = e1000_read_phy_reg_gs40g; + phy->ops.write_reg = e1000_write_phy_reg_gs40g; + break; + default: + phy->ops.read_reg = e1000_read_phy_reg_igp; + phy->ops.write_reg = e1000_write_phy_reg_igp; + } + } + + /* Set phy->phy_addr and phy->id. */ + ret_val = e1000_get_phy_id_82575(hw); + + /* Verify phy id and set remaining function pointers */ + switch (phy->id) { + case M88E1543_E_PHY_ID: + case I347AT4_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case M88E1111_I_PHY_ID: + phy->type = e1000_phy_m88; + phy->ops.check_polarity = e1000_check_polarity_m88; + phy->ops.get_info = e1000_get_phy_info_m88; + if (phy->id == I347AT4_E_PHY_ID || + phy->id == M88E1112_E_PHY_ID || + phy->id == M88E1340M_E_PHY_ID) + phy->ops.get_cable_length = + e1000_get_cable_length_m88_gen2; + else if (phy->id == M88E1543_E_PHY_ID) + phy->ops.get_cable_length = + e1000_get_cable_length_m88_gen2; + else + phy->ops.get_cable_length = e1000_get_cable_length_m88; + phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; + /* Check if this PHY is confgured for media swap. */ + if (phy->id == M88E1112_E_PHY_ID) { + u16 data; + + ret_val = phy->ops.write_reg(hw, + E1000_M88E1112_PAGE_ADDR, + 2); + if (ret_val) + goto out; + + ret_val = phy->ops.read_reg(hw, + E1000_M88E1112_MAC_CTRL_1, + &data); + if (ret_val) + goto out; + + data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> + E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; + if (data == E1000_M88E1112_AUTO_COPPER_SGMII || + data == E1000_M88E1112_AUTO_COPPER_BASEX) + hw->mac.ops.check_for_link = + e1000_check_for_link_media_swap; + } + break; + case IGP03E1000_E_PHY_ID: + case IGP04E1000_E_PHY_ID: + phy->type = e1000_phy_igp_3; + phy->ops.check_polarity = e1000_check_polarity_igp; + phy->ops.get_info = e1000_get_phy_info_igp; + phy->ops.get_cable_length = e1000_get_cable_length_igp_2; + phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; + phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575; + phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; + break; + case I82580_I_PHY_ID: + case I350_I_PHY_ID: + phy->type = e1000_phy_82580; + phy->ops.check_polarity = e1000_check_polarity_82577; + phy->ops.force_speed_duplex = + e1000_phy_force_speed_duplex_82577; + phy->ops.get_cable_length = e1000_get_cable_length_82577; + phy->ops.get_info = e1000_get_phy_info_82577; + phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; + phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; + break; + case I210_I_PHY_ID: + phy->type = e1000_phy_i210; + phy->ops.check_polarity = e1000_check_polarity_m88; + phy->ops.get_info = e1000_get_phy_info_m88; + phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; + phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; + phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; + phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; + break; + default: + ret_val = -E1000_ERR_PHY; + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_init_nvm_params_82575 - Init NVM func ptrs. + * @hw: pointer to the HW structure + **/ +s32 e1000_init_nvm_params_82575(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + u16 size; + + DEBUGFUNC("e1000_init_nvm_params_82575"); + + size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> + E1000_EECD_SIZE_EX_SHIFT); + /* + * Added to a constant, "size" becomes the left-shift value + * for setting word_size. + */ + size += NVM_WORD_SIZE_BASE_SHIFT; + + /* Just in case size is out of range, cap it to the largest + * EEPROM size supported + */ + if (size > 15) + size = 15; + + nvm->word_size = 1 << size; + if (hw->mac.type < e1000_i210) { + nvm->opcode_bits = 8; + nvm->delay_usec = 1; + + switch (nvm->override) { + case e1000_nvm_override_spi_large: + nvm->page_size = 32; + nvm->address_bits = 16; + break; + case e1000_nvm_override_spi_small: + nvm->page_size = 8; + nvm->address_bits = 8; + break; + default: + nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; + nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? + 16 : 8; + break; + } + if (nvm->word_size == (1 << 15)) + nvm->page_size = 128; + + nvm->type = e1000_nvm_eeprom_spi; + } else { + nvm->type = e1000_nvm_flash_hw; + } + + /* Function Pointers */ + nvm->ops.acquire = e1000_acquire_nvm_82575; + nvm->ops.release = e1000_release_nvm_82575; + if (nvm->word_size < (1 << 15)) + nvm->ops.read = e1000_read_nvm_eerd; + else + nvm->ops.read = e1000_read_nvm_spi; + + nvm->ops.write = e1000_write_nvm_spi; + nvm->ops.validate = e1000_validate_nvm_checksum_generic; + nvm->ops.update = e1000_update_nvm_checksum_generic; + nvm->ops.valid_led_default = e1000_valid_led_default_82575; + + /* override generic family function pointers for specific descendants */ + switch (hw->mac.type) { + case e1000_82580: + nvm->ops.validate = e1000_validate_nvm_checksum_82580; + nvm->ops.update = e1000_update_nvm_checksum_82580; + break; + case e1000_i350: + //case e1000_i354: + nvm->ops.validate = e1000_validate_nvm_checksum_i350; + nvm->ops.update = e1000_update_nvm_checksum_i350; + break; + default: + break; + } + + return E1000_SUCCESS; +} + +/** + * e1000_init_mac_params_82575 - Init MAC func ptrs. + * @hw: pointer to the HW structure + **/ +static s32 e1000_init_mac_params_82575(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + + DEBUGFUNC("e1000_init_mac_params_82575"); + + /* Derives media type */ + e1000_get_media_type_82575(hw); + /* Set mta register count */ + mac->mta_reg_count = 128; + /* Set uta register count */ + mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; + /* Set rar entry count */ + mac->rar_entry_count = E1000_RAR_ENTRIES_82575; + if (mac->type == e1000_82576) + mac->rar_entry_count = E1000_RAR_ENTRIES_82576; + if (mac->type == e1000_82580) + mac->rar_entry_count = E1000_RAR_ENTRIES_82580; + if (mac->type == e1000_i350 || mac->type == e1000_i354) + mac->rar_entry_count = E1000_RAR_ENTRIES_I350; + + /* Enable EEE default settings for EEE supported devices */ + if (mac->type >= e1000_i350) + dev_spec->eee_disable = false; + + /* Allow a single clear of the SW semaphore on I210 and newer */ + if (mac->type >= e1000_i210) + dev_spec->clear_semaphore_once = true; + + /* Set if part includes ASF firmware */ + mac->asf_firmware_present = true; + /* FWSM register */ + mac->has_fwsm = true; + /* ARC supported; valid only if manageability features are enabled. */ + mac->arc_subsystem_valid = + !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); + + /* Function pointers */ + + /* bus type/speed/width */ + mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; + /* reset */ + if (mac->type >= e1000_82580) + mac->ops.reset_hw = e1000_reset_hw_82580; + else + mac->ops.reset_hw = e1000_reset_hw_82575; + /* hw initialization */ + mac->ops.init_hw = e1000_init_hw_82575; + /* link setup */ + mac->ops.setup_link = e1000_setup_link_generic; + /* physical interface link setup */ + mac->ops.setup_physical_interface = + (hw->phy.media_type == e1000_media_type_copper) + ? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575; + /* physical interface shutdown */ + mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575; + /* physical interface power up */ + mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575; + /* check for link */ + mac->ops.check_for_link = e1000_check_for_link_82575; + /* read mac address */ + mac->ops.read_mac_addr = e1000_read_mac_addr_82575; + /* configure collision distance */ + mac->ops.config_collision_dist = e1000_config_collision_dist_82575; + /* multicast address update */ + mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; + if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) { + /* writing VFTA */ + mac->ops.write_vfta = e1000_write_vfta_i350; + /* clearing VFTA */ + mac->ops.clear_vfta = e1000_clear_vfta_i350; + } else { + /* writing VFTA */ + mac->ops.write_vfta = e1000_write_vfta_generic; + /* clearing VFTA */ + mac->ops.clear_vfta = e1000_clear_vfta_generic; + } + if (hw->mac.type >= e1000_82580) + mac->ops.validate_mdi_setting = + e1000_validate_mdi_setting_crossover_generic; + /* ID LED init */ + mac->ops.id_led_init = e1000_id_led_init_generic; + /* blink LED */ + mac->ops.blink_led = e1000_blink_led_generic; + /* setup LED */ + mac->ops.setup_led = e1000_setup_led_generic; + /* cleanup LED */ + mac->ops.cleanup_led = e1000_cleanup_led_generic; + /* turn on/off LED */ + mac->ops.led_on = e1000_led_on_generic; + mac->ops.led_off = e1000_led_off_generic; + /* clear hardware counters */ + mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575; + /* link info */ + mac->ops.get_link_up_info = e1000_get_link_up_info_82575; + /* get thermal sensor data */ + mac->ops.get_thermal_sensor_data = + e1000_get_thermal_sensor_data_generic; + mac->ops.init_thermal_sensor_thresh = + e1000_init_thermal_sensor_thresh_generic; + /* acquire SW_FW sync */ + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575; + mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575; + if (mac->type >= e1000_i210) { + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210; + mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210; + } + + /* set lan id for port to determine which phy lock to use */ + hw->mac.ops.set_lan_id(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_init_function_pointers_82575 - Init func ptrs. + * @hw: pointer to the HW structure + * + * Called to initialize all function pointers and parameters. + **/ +void e1000_init_function_pointers_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_init_function_pointers_82575"); + + hw->mac.ops.init_params = e1000_init_mac_params_82575; + hw->nvm.ops.init_params = e1000_init_nvm_params_82575; + hw->phy.ops.init_params = e1000_init_phy_params_82575; + hw->mbx.ops.init_params = e1000_init_mbx_params_pf; +} + +/** + * e1000_acquire_phy_82575 - Acquire rights to access PHY + * @hw: pointer to the HW structure + * + * Acquire access rights to the correct PHY. + **/ +static s32 e1000_acquire_phy_82575(struct e1000_hw *hw) +{ + u16 mask = E1000_SWFW_PHY0_SM; + + DEBUGFUNC("e1000_acquire_phy_82575"); + + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_SWFW_PHY1_SM; + else if (hw->bus.func == E1000_FUNC_2) + mask = E1000_SWFW_PHY2_SM; + else if (hw->bus.func == E1000_FUNC_3) + mask = E1000_SWFW_PHY3_SM; + + return hw->mac.ops.acquire_swfw_sync(hw, mask); +} + +/** + * e1000_release_phy_82575 - Release rights to access PHY + * @hw: pointer to the HW structure + * + * A wrapper to release access rights to the correct PHY. + **/ +static void e1000_release_phy_82575(struct e1000_hw *hw) +{ + u16 mask = E1000_SWFW_PHY0_SM; + + DEBUGFUNC("e1000_release_phy_82575"); + + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_SWFW_PHY1_SM; + else if (hw->bus.func == E1000_FUNC_2) + mask = E1000_SWFW_PHY2_SM; + else if (hw->bus.func == E1000_FUNC_3) + mask = E1000_SWFW_PHY3_SM; + + hw->mac.ops.release_swfw_sync(hw, mask); +} + +/** + * e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset using the serial gigabit media independent + * interface and stores the retrieved information in data. + **/ +static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, + u16 *data) +{ + s32 ret_val = -E1000_ERR_PARAM; + + DEBUGFUNC("e1000_read_phy_reg_sgmii_82575"); + + if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { + DEBUGOUT1("PHY Address %u is out of range\n", offset); + goto out; + } + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_read_phy_reg_i2c(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes the data to PHY register at the offset using the serial gigabit + * media independent interface. + **/ +static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, + u16 data) +{ + s32 ret_val = -E1000_ERR_PARAM; + + DEBUGFUNC("e1000_write_phy_reg_sgmii_82575"); + + if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { + DEBUGOUT1("PHY Address %d is out of range\n", offset); + goto out; + } + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_write_phy_reg_i2c(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_get_phy_id_82575 - Retrieve PHY addr and id + * @hw: pointer to the HW structure + * + * Retrieves the PHY address and ID for both PHY's which do and do not use + * sgmi interface. + **/ +static s32 e1000_get_phy_id_82575(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_id; + u32 ctrl_ext; + u32 mdic; + + DEBUGFUNC("e1000_get_phy_id_82575"); + + /* i354 devices can have a PHY that needs an extra read for id */ + if (hw->mac.type == e1000_i354) + e1000_get_phy_id(hw); + + + /* + * For SGMII PHYs, we try the list of possible addresses until + * we find one that works. For non-SGMII PHYs + * (e.g. integrated copper PHYs), an address of 1 should + * work. The result of this function should mean phy->phy_addr + * and phy->id are set correctly. + */ + if (!e1000_sgmii_active_82575(hw)) { + phy->addr = 1; + ret_val = e1000_get_phy_id(hw); + goto out; + } + + if (e1000_sgmii_uses_mdio_82575(hw)) { + switch (hw->mac.type) { + case e1000_82575: + case e1000_82576: + mdic = E1000_READ_REG(hw, E1000_MDIC); + mdic &= E1000_MDIC_PHY_MASK; + phy->addr = mdic >> E1000_MDIC_PHY_SHIFT; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + mdic = E1000_READ_REG(hw, E1000_MDICNFG); + mdic &= E1000_MDICNFG_PHY_MASK; + phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT; + break; + default: + ret_val = -E1000_ERR_PHY; + goto out; + break; + } + ret_val = e1000_get_phy_id(hw); + goto out; + } + + /* Power on sgmii phy if it is disabled */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + E1000_WRITE_REG(hw, E1000_CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA); + E1000_WRITE_FLUSH(hw); + msec_delay(300); + + /* + * The address field in the I2CCMD register is 3 bits and 0 is invalid. + * Therefore, we need to test 1-7 + */ + for (phy->addr = 1; phy->addr < 8; phy->addr++) { + ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id); + if (ret_val == E1000_SUCCESS) { + DEBUGOUT2("Vendor ID 0x%08X read at address %u\n", + phy_id, phy->addr); + /* + * At the time of this writing, The M88 part is + * the only supported SGMII PHY product. + */ + if (phy_id == M88_VENDOR) + break; + } else { + DEBUGOUT1("PHY address %u was unreadable\n", + phy->addr); + } + } + + /* A valid PHY type couldn't be found. */ + if (phy->addr == 8) { + phy->addr = 0; + ret_val = -E1000_ERR_PHY; + } else { + ret_val = e1000_get_phy_id(hw); + } + + /* restore previous sfp cage power state */ + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + +out: + return ret_val; +} + +/** + * e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset + * @hw: pointer to the HW structure + * + * Resets the PHY using the serial gigabit media independent interface. + **/ +static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575"); + + /* + * This isn't a true "hard" reset, but is the only reset + * available to us at this time. + */ + + DEBUGOUT("Soft resetting SGMII attached PHY...\n"); + + if (!(hw->phy.ops.write_reg)) + goto out; + + /* + * SFP documentation requires the following to configure the SPF module + * to work on SGMII. No further documentation is given. + */ + ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084); + if (ret_val) + goto out; + + ret_val = hw->phy.ops.commit(hw); + +out: + return ret_val; +} + +/** + * e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state + * @hw: pointer to the HW structure + * @active: true to enable LPLU, false to disable + * + * Sets the LPLU D0 state according to the active flag. When + * activating LPLU this function also disables smart speed + * and vice versa. LPLU will not be activated unless the + * device autonegotiation advertisement meets standards of + * either 10 or 10/100 or 10/100/1000 at all duplexes. + * This is a function pointer entry point only called by + * PHY setup routines. + **/ +static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 data; + + DEBUGFUNC("e1000_set_d0_lplu_state_82575"); + + if (!(hw->phy.ops.read_reg)) + goto out; + + ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); + if (ret_val) + goto out; + + if (active) { + data |= IGP02E1000_PM_D0_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + if (ret_val) + goto out; + + /* When LPLU is enabled, we should disable SmartSpeed */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + &data); + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + goto out; + } else { + data &= ~IGP02E1000_PM_D0_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + /* + * LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + goto out; + + data |= IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + goto out; + } else if (phy->smart_speed == e1000_smart_speed_off) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + goto out; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + goto out; + } + } + +out: + return ret_val; +} + +/** + * e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state + * @hw: pointer to the HW structure + * @active: true to enable LPLU, false to disable + * + * Sets the LPLU D0 state according to the active flag. When + * activating LPLU this function also disables smart speed + * and vice versa. LPLU will not be activated unless the + * device autonegotiation advertisement meets standards of + * either 10 or 10/100 or 10/100/1000 at all duplexes. + * This is a function pointer entry point only called by + * PHY setup routines. + **/ +static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u32 data; + + DEBUGFUNC("e1000_set_d0_lplu_state_82580"); + + data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); + + if (active) { + data |= E1000_82580_PM_D0_LPLU; + + /* When LPLU is enabled, we should disable SmartSpeed */ + data &= ~E1000_82580_PM_SPD; + } else { + data &= ~E1000_82580_PM_D0_LPLU; + + /* + * LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) + data |= E1000_82580_PM_SPD; + else if (phy->smart_speed == e1000_smart_speed_off) + data &= ~E1000_82580_PM_SPD; + } + + E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); + return ret_val; +} + +/** + * e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D3 + * and SmartSpeed is disabled when active is true, else clear lplu for D3 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. + **/ +s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u32 data; + + DEBUGFUNC("e1000_set_d3_lplu_state_82580"); + + data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); + + if (!active) { + data &= ~E1000_82580_PM_D3_LPLU; + /* + * LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) + data |= E1000_82580_PM_SPD; + else if (phy->smart_speed == e1000_smart_speed_off) + data &= ~E1000_82580_PM_SPD; + } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || + (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || + (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { + data |= E1000_82580_PM_D3_LPLU; + /* When LPLU is enabled, we should disable SmartSpeed */ + data &= ~E1000_82580_PM_SPD; + } + + E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); + return ret_val; +} + +/** + * e1000_acquire_nvm_82575 - Request for access to EEPROM + * @hw: pointer to the HW structure + * + * Acquire the necessary semaphores for exclusive access to the EEPROM. + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -E1000_ERR_NVM (-1). + **/ +static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_acquire_nvm_82575"); + + ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + if (ret_val) + goto out; + + /* + * Check if there is some access + * error this access may hook on + */ + if (hw->mac.type == e1000_i350) { + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT | + E1000_EECD_TIMEOUT)) { + /* Clear all access error flags */ + E1000_WRITE_REG(hw, E1000_EECD, eecd | + E1000_EECD_ERROR_CLR); + DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); + } + } + if (hw->mac.type == e1000_82580) { + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + if (eecd & E1000_EECD_BLOCKED) { + /* Clear access error flag */ + E1000_WRITE_REG(hw, E1000_EECD, eecd | + E1000_EECD_BLOCKED); + DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); + } + } + + + ret_val = e1000_acquire_nvm_generic(hw); + if (ret_val) + e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + +out: + return ret_val; +} + +/** + * e1000_release_nvm_82575 - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit, + * then release the semaphores acquired. + **/ +static void e1000_release_nvm_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_release_nvm_82575"); + + e1000_release_nvm_generic(hw); + + e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); +} + +/** + * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ + + DEBUGFUNC("e1000_acquire_swfw_sync_82575"); + + while (i < timeout) { + if (e1000_get_hw_semaphore_generic(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore_generic(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync_82575 - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync_82575"); + + while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); +} + +/** + * e1000_get_cfg_done_82575 - Read config done bit + * @hw: pointer to the HW structure + * + * Read the management control register for the config done bit for + * completion status. NOTE: silicon which is EEPROM-less will fail trying + * to read the config done bit, so an error is *ONLY* logged and returns + * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon + * would not be able to be reset or change link. + **/ +static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw) +{ + s32 timeout = PHY_CFG_TIMEOUT; + s32 ret_val = E1000_SUCCESS; + u32 mask = E1000_NVM_CFG_DONE_PORT_0; + + DEBUGFUNC("e1000_get_cfg_done_82575"); + + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_NVM_CFG_DONE_PORT_1; + else if (hw->bus.func == E1000_FUNC_2) + mask = E1000_NVM_CFG_DONE_PORT_2; + else if (hw->bus.func == E1000_FUNC_3) + mask = E1000_NVM_CFG_DONE_PORT_3; + while (timeout) { + if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) + break; + msec_delay(1); + timeout--; + } + if (!timeout) + DEBUGOUT("MNG configuration cycle has not completed.\n"); + + /* If EEPROM is not marked present, init the PHY manually */ + if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) && + (hw->phy.type == e1000_phy_igp_3)) + e1000_phy_init_script_igp3(hw); + + return ret_val; +} + +/** + * e1000_get_link_up_info_82575 - Get link speed/duplex info + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * This is a wrapper function, if using the serial gigabit media independent + * interface, use PCS to retrieve the link speed and duplex information. + * Otherwise, use the generic function to get the link speed and duplex info. + **/ +static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, + u16 *duplex) +{ + s32 ret_val; + + DEBUGFUNC("e1000_get_link_up_info_82575"); + + if (hw->phy.media_type != e1000_media_type_copper) + ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed, + duplex); + else + ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, + duplex); + + return ret_val; +} + +/** + * e1000_check_for_link_82575 - Check for link + * @hw: pointer to the HW structure + * + * If sgmii is enabled, then use the pcs register to determine link, otherwise + * use the generic interface for determining link. + **/ +static s32 e1000_check_for_link_82575(struct e1000_hw *hw) +{ + s32 ret_val; + u16 speed, duplex; + + DEBUGFUNC("e1000_check_for_link_82575"); + + if (hw->phy.media_type != e1000_media_type_copper) { + ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed, + &duplex); + /* + * Use this flag to determine if link needs to be checked or + * not. If we have link clear the flag so that we do not + * continue to check for link. + */ + hw->mac.get_link_status = !hw->mac.serdes_has_link; + + /* + * Configure Flow Control now that Auto-Neg has completed. + * First, we need to restore the desired flow control + * settings because we may have had to re-autoneg with a + * different link partner. + */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) + DEBUGOUT("Error configuring flow control\n"); + } else { + ret_val = e1000_check_for_copper_link_generic(hw); + } + + return ret_val; +} + +/** + * e1000_check_for_link_media_swap - Check which M88E1112 interface linked + * @hw: pointer to the HW structure + * + * Poll the M88E1112 interfaces to see which interface achieved link. + */ +static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + u8 port = 0; + + DEBUGFUNC("e1000_check_for_link_media_swap"); + + /* Check the copper medium. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); + if (ret_val) + return ret_val; + + if (data & E1000_M88E1112_STATUS_LINK) + port = E1000_MEDIA_PORT_COPPER; + + /* Check the other medium. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); + if (ret_val) + return ret_val; + + if (data & E1000_M88E1112_STATUS_LINK) + port = E1000_MEDIA_PORT_OTHER; + + /* Determine if a swap needs to happen. */ + if (port && (hw->dev_spec._82575.media_port != port)) { + hw->dev_spec._82575.media_port = port; + hw->dev_spec._82575.media_changed = true; + } else { + ret_val = e1000_check_for_link_82575(hw); + } + + return E1000_SUCCESS; +} + +/** + * e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown + * @hw: pointer to the HW structure + **/ +static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw) +{ + u32 reg; + + DEBUGFUNC("e1000_power_up_serdes_link_82575"); + + if ((hw->phy.media_type != e1000_media_type_internal_serdes) && + !e1000_sgmii_active_82575(hw)) + return; + + /* Enable PCS to turn on link */ + reg = E1000_READ_REG(hw, E1000_PCS_CFG0); + reg |= E1000_PCS_CFG_PCS_EN; + E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); + + /* Power up the laser */ + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg &= ~E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); + + /* flush the write to verify completion */ + E1000_WRITE_FLUSH(hw); + msec_delay(1); +} + +/** + * e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * Using the physical coding sub-layer (PCS), retrieve the current speed and + * duplex, then store the values in the pointers provided. + **/ +static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, + u16 *speed, u16 *duplex) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 pcs; + u32 status; + + DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575"); + + /* + * Read the PCS Status register for link state. For non-copper mode, + * the status register is not accurate. The PCS status register is + * used instead. + */ + pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT); + + /* + * The link up bit determines when link is up on autoneg. + */ + if (pcs & E1000_PCS_LSTS_LINK_OK) { + mac->serdes_has_link = true; + + /* Detect and store PCS speed */ + if (pcs & E1000_PCS_LSTS_SPEED_1000) + *speed = SPEED_1000; + else if (pcs & E1000_PCS_LSTS_SPEED_100) + *speed = SPEED_100; + else + *speed = SPEED_10; + + /* Detect and store PCS duplex */ + if (pcs & E1000_PCS_LSTS_DUPLEX_FULL) + *duplex = FULL_DUPLEX; + else + *duplex = HALF_DUPLEX; + + /* Check if it is an I354 2.5Gb backplane connection. */ + if (mac->type == e1000_i354) { + status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) { + *speed = SPEED_2500; + *duplex = FULL_DUPLEX; + DEBUGOUT("2500 Mbs, "); + DEBUGOUT("Full Duplex\n"); + } + } + + } else { + mac->serdes_has_link = false; + *speed = 0; + *duplex = 0; + } + + return E1000_SUCCESS; +} + +/** + * e1000_shutdown_serdes_link_82575 - Remove link during power down + * @hw: pointer to the HW structure + * + * In the case of serdes shut down sfp and PCS on driver unload + * when management pass thru is not enabled. + **/ +void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw) +{ + u32 reg; + + DEBUGFUNC("e1000_shutdown_serdes_link_82575"); + + if ((hw->phy.media_type != e1000_media_type_internal_serdes) && + !e1000_sgmii_active_82575(hw)) + return; + + if (!e1000_enable_mng_pass_thru(hw)) { + /* Disable PCS to turn off link */ + reg = E1000_READ_REG(hw, E1000_PCS_CFG0); + reg &= ~E1000_PCS_CFG_PCS_EN; + E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); + + /* shutdown the laser */ + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg |= E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); + + /* flush the write to verify completion */ + E1000_WRITE_FLUSH(hw); + msec_delay(1); + } + + return; +} + +/** + * e1000_reset_hw_82575 - Reset hardware + * @hw: pointer to the HW structure + * + * This resets the hardware into a known state. + **/ +static s32 e1000_reset_hw_82575(struct e1000_hw *hw) +{ + u32 ctrl; + s32 ret_val; + + DEBUGFUNC("e1000_reset_hw_82575"); + + /* + * Prevent the PCI-E bus from sticking if there is no TLP connection + * on the last TLP read/write transaction when MAC is reset. + */ + ret_val = e1000_disable_pcie_master_generic(hw); + if (ret_val) + DEBUGOUT("PCI-E Master disable polling has failed.\n"); + + /* set the completion timeout for interface */ + ret_val = e1000_set_pcie_completion_timeout(hw); + if (ret_val) + DEBUGOUT("PCI-E Set completion timeout has failed.\n"); + + DEBUGOUT("Masking off all interrupts\n"); + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + + E1000_WRITE_REG(hw, E1000_RCTL, 0); + E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); + E1000_WRITE_FLUSH(hw); + + msec_delay(10); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + DEBUGOUT("Issuing a global reset to MAC\n"); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); + + ret_val = e1000_get_auto_rd_done_generic(hw); + if (ret_val) { + /* + * When auto config read does not complete, do not + * return with an error. This can happen in situations + * where there is no eeprom and prevents getting link. + */ + DEBUGOUT("Auto Read Done did not complete\n"); + } + + /* If EEPROM is not present, run manual init scripts */ + if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES)) + e1000_reset_init_script_82575(hw); + + /* Clear any pending interrupt events. */ + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + E1000_READ_REG(hw, E1000_ICR); + + /* Install any alternate MAC address into RAR0 */ + ret_val = e1000_check_alt_mac_addr_generic(hw); + + return ret_val; +} + +/** + * e1000_init_hw_82575 - Initialize hardware + * @hw: pointer to the HW structure + * + * This inits the hardware readying it for operation. + **/ +static s32 e1000_init_hw_82575(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val; + u16 i, rar_count = mac->rar_entry_count; + + DEBUGFUNC("e1000_init_hw_82575"); + + /* Initialize identification LED */ + ret_val = mac->ops.id_led_init(hw); + if (ret_val) { + DEBUGOUT("Error initializing identification LED\n"); + /* This is not fatal and we should not stop init due to this */ + } + + /* Disabling VLAN filtering */ + DEBUGOUT("Initializing the IEEE VLAN\n"); + mac->ops.clear_vfta(hw); + + /* Setup the receive address */ + e1000_init_rx_addrs_generic(hw, rar_count); + + /* Zero out the Multicast HASH table */ + DEBUGOUT("Zeroing the MTA\n"); + for (i = 0; i < mac->mta_reg_count; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); + + /* Zero out the Unicast HASH table */ + DEBUGOUT("Zeroing the UTA\n"); + for (i = 0; i < mac->uta_reg_count; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0); + + /* Setup link and flow control */ + ret_val = mac->ops.setup_link(hw); + + /* Set the default MTU size */ + hw->dev_spec._82575.mtu = 1500; + + /* + * Clear all of the statistics registers (clear on read). It is + * important that we do this after we have tried to establish link + * because the symbol error count will increment wildly if there + * is no link. + */ + e1000_clear_hw_cntrs_82575(hw); + + return ret_val; +} + +/** + * e1000_setup_copper_link_82575 - Configure copper link settings + * @hw: pointer to the HW structure + * + * Configures the link for auto-neg or forced speed and duplex. Then we check + * for link, once link is established calls to configure collision distance + * and flow control are called. + **/ +static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw) +{ + u32 ctrl; + s32 ret_val; + u32 phpm_reg; + + DEBUGFUNC("e1000_setup_copper_link_82575"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_SLU; + ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Clear Go Link Disconnect bit on supported devices */ + switch (hw->mac.type) { + case e1000_82580: + case e1000_i350: + case e1000_i210: + case e1000_i211: + phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); + phpm_reg &= ~E1000_82580_PM_GO_LINKD; + E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg); + break; + default: + break; + } + + ret_val = e1000_setup_serdes_link_82575(hw); + if (ret_val) + goto out; + + if (e1000_sgmii_active_82575(hw) && !hw->phy.reset_disable) { + /* allow time for SFP cage time to power up phy */ + msec_delay(300); + + ret_val = hw->phy.ops.reset(hw); + if (ret_val) { + DEBUGOUT("Error resetting the PHY.\n"); + goto out; + } + } + switch (hw->phy.type) { + case e1000_phy_i210: + case e1000_phy_m88: + switch (hw->phy.id) { + case I347AT4_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case M88E1543_E_PHY_ID: + case I210_I_PHY_ID: + ret_val = e1000_copper_link_setup_m88_gen2(hw); + break; + default: + ret_val = e1000_copper_link_setup_m88(hw); + break; + } + break; + case e1000_phy_igp_3: + ret_val = e1000_copper_link_setup_igp(hw); + break; + case e1000_phy_82580: + ret_val = e1000_copper_link_setup_82577(hw); + break; + default: + ret_val = -E1000_ERR_PHY; + break; + } + + if (ret_val) + goto out; + + ret_val = e1000_setup_copper_link_generic(hw); +out: + return ret_val; +} + +/** + * e1000_setup_serdes_link_82575 - Setup link for serdes + * @hw: pointer to the HW structure + * + * Configure the physical coding sub-layer (PCS) link. The PCS link is + * used on copper connections where the serialized gigabit media independent + * interface (sgmii), or serdes fiber is being used. Configures the link + * for auto-negotiation or forces speed/duplex. + **/ +static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw) +{ + u32 ctrl_ext, ctrl_reg, reg, anadv_reg; + bool pcs_autoneg; + s32 ret_val = E1000_SUCCESS; + u16 data; + + DEBUGFUNC("e1000_setup_serdes_link_82575"); + + if ((hw->phy.media_type != e1000_media_type_internal_serdes) && + !e1000_sgmii_active_82575(hw)) + return ret_val; + + /* + * On the 82575, SerDes loopback mode persists until it is + * explicitly turned off or a power cycle is performed. A read to + * the register does not indicate its status. Therefore, we ensure + * loopback mode is disabled during initialization. + */ + E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); + + /* power on the sfp cage if present */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + + ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); + ctrl_reg |= E1000_CTRL_SLU; + + /* set both sw defined pins on 82575/82576*/ + if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) + ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1; + + reg = E1000_READ_REG(hw, E1000_PCS_LCTL); + + /* default pcs_autoneg to the same setting as mac autoneg */ + pcs_autoneg = hw->mac.autoneg; + + switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { + case E1000_CTRL_EXT_LINK_MODE_SGMII: + /* sgmii mode lets the phy handle forcing speed/duplex */ + pcs_autoneg = true; + /* autoneg time out should be disabled for SGMII mode */ + reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT); + break; + case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: + /* disable PCS autoneg and support parallel detect only */ + pcs_autoneg = false; + /* fall through to default case */ + default: + if (hw->mac.type == e1000_82575 || + hw->mac.type == e1000_82576) { + ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT) + pcs_autoneg = false; + } + + /* + * non-SGMII modes only supports a speed of 1000/Full for the + * link so it is best to just force the MAC and let the pcs + * link either autoneg or be forced to 1000/Full + */ + ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD | + E1000_CTRL_FD | E1000_CTRL_FRCDPX; + + /* set speed of 1000/Full if speed/duplex is forced */ + reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL; + break; + } + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); + + /* + * New SerDes mode allows for forcing speed or autonegotiating speed + * at 1gb. Autoneg should be default set by most drivers. This is the + * mode that will be compatible with older link partners and switches. + * However, both are supported by the hardware and some drivers/tools. + */ + reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP | + E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK); + + if (pcs_autoneg) { + /* Set PCS register for autoneg */ + reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */ + E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */ + + /* Disable force flow control for autoneg */ + reg &= ~E1000_PCS_LCTL_FORCE_FCTRL; + + /* Configure flow control advertisement for autoneg */ + anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); + anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE); + + switch (hw->fc.requested_mode) { + case e1000_fc_full: + case e1000_fc_rx_pause: + anadv_reg |= E1000_TXCW_ASM_DIR; + anadv_reg |= E1000_TXCW_PAUSE; + break; + case e1000_fc_tx_pause: + anadv_reg |= E1000_TXCW_ASM_DIR; + break; + default: + break; + } + + E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg); + + DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg); + } else { + /* Set PCS register for forced link */ + reg |= E1000_PCS_LCTL_FSD; /* Force Speed */ + + /* Force flow control for forced link */ + reg |= E1000_PCS_LCTL_FORCE_FCTRL; + + DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg); + } + + E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); + + if (!pcs_autoneg && !e1000_sgmii_active_82575(hw)) + e1000_force_mac_fc_generic(hw); + + return ret_val; +} + +/** + * e1000_get_media_type_82575 - derives current media type. + * @hw: pointer to the HW structure + * + * The media type is chosen reflecting few settings. + * The following are taken into account: + * - link mode set in the current port Init Control Word #3 + * - current link mode settings in CSR register + * - MDIO vs. I2C PHY control interface chosen + * - SFP module media type + **/ +static s32 e1000_get_media_type_82575(struct e1000_hw *hw) +{ + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + s32 ret_val = E1000_SUCCESS; + u32 ctrl_ext = 0; + u32 link_mode = 0; + + /* Set internal phy as default */ + dev_spec->sgmii_active = false; + dev_spec->module_plugged = false; + + /* Get CSR setting */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + + /* extract link mode setting */ + link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK; + + switch (link_mode) { + case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: + hw->phy.media_type = e1000_media_type_internal_serdes; + break; + case E1000_CTRL_EXT_LINK_MODE_GMII: + hw->phy.media_type = e1000_media_type_copper; + break; + case E1000_CTRL_EXT_LINK_MODE_SGMII: + /* Get phy control interface type set (MDIO vs. I2C)*/ + if (e1000_sgmii_uses_mdio_82575(hw)) { + hw->phy.media_type = e1000_media_type_copper; + dev_spec->sgmii_active = true; + break; + } + /* fall through for I2C based SGMII */ + case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: + /* read media type from SFP EEPROM */ + ret_val = e1000_set_sfp_media_type_82575(hw); + if ((ret_val != E1000_SUCCESS) || + (hw->phy.media_type == e1000_media_type_unknown)) { + /* + * If media type was not identified then return media + * type defined by the CTRL_EXT settings. + */ + hw->phy.media_type = e1000_media_type_internal_serdes; + + if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) { + hw->phy.media_type = e1000_media_type_copper; + dev_spec->sgmii_active = true; + } + + break; + } + + /* do not change link mode for 100BaseFX */ + if (dev_spec->eth_flags.e100_base_fx) + break; + + /* change current link mode setting */ + ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; + + if (hw->phy.media_type == e1000_media_type_copper) + ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; + else + ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + + break; + } + + return ret_val; +} + +/** + * e1000_set_sfp_media_type_82575 - derives SFP module media type. + * @hw: pointer to the HW structure + * + * The media type is chosen based on SFP module. + * compatibility flags retrieved from SFP ID EEPROM. + **/ +static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw) +{ + s32 ret_val = E1000_ERR_CONFIG; + u32 ctrl_ext = 0; + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags; + u8 tranceiver_type = 0; + s32 timeout = 3; + + /* Turn I2C interface ON and power on sfp cage */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA); + + E1000_WRITE_FLUSH(hw); + + /* Read SFP module data */ + while (timeout) { + ret_val = e1000_read_sfp_data_byte(hw, + E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET), + &tranceiver_type); + if (ret_val == E1000_SUCCESS) + break; + msec_delay(100); + timeout--; + } + if (ret_val != E1000_SUCCESS) + goto out; + + ret_val = e1000_read_sfp_data_byte(hw, + E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET), + (u8 *)eth_flags); + if (ret_val != E1000_SUCCESS) + goto out; + + /* Check if there is some SFP module plugged and powered */ + if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) || + (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) { + dev_spec->module_plugged = true; + if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { + hw->phy.media_type = e1000_media_type_internal_serdes; + } else if (eth_flags->e100_base_fx) { + dev_spec->sgmii_active = true; + hw->phy.media_type = e1000_media_type_internal_serdes; + } else if (eth_flags->e1000_base_t) { + dev_spec->sgmii_active = true; + hw->phy.media_type = e1000_media_type_copper; + } else { + hw->phy.media_type = e1000_media_type_unknown; + DEBUGOUT("PHY module has not been recognized\n"); + goto out; + } + } else { + hw->phy.media_type = e1000_media_type_unknown; + } + ret_val = E1000_SUCCESS; +out: + /* Restore I2C interface setting */ + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + return ret_val; +} + +/** + * e1000_valid_led_default_82575 - Verify a valid default LED config + * @hw: pointer to the HW structure + * @data: pointer to the NVM (EEPROM) + * + * Read the EEPROM for the current default LED configuration. If the + * LED configuration is not valid, set to a valid LED configuration. + **/ +static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_valid_led_default_82575"); + + ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { + switch (hw->phy.media_type) { + case e1000_media_type_internal_serdes: + *data = ID_LED_DEFAULT_82575_SERDES; + break; + case e1000_media_type_copper: + default: + *data = ID_LED_DEFAULT; + break; + } + } +out: + return ret_val; +} + +/** + * e1000_sgmii_active_82575 - Return sgmii state + * @hw: pointer to the HW structure + * + * 82575 silicon has a serialized gigabit media independent interface (sgmii) + * which can be enabled for use in the embedded applications. Simply + * return the current state of the sgmii interface. + **/ +static bool e1000_sgmii_active_82575(struct e1000_hw *hw) +{ + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + return dev_spec->sgmii_active; +} + +/** + * e1000_reset_init_script_82575 - Inits HW defaults after reset + * @hw: pointer to the HW structure + * + * Inits recommended HW defaults after a reset when there is no EEPROM + * detected. This is only for the 82575. + **/ +static s32 e1000_reset_init_script_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_reset_init_script_82575"); + + if (hw->mac.type == e1000_82575) { + DEBUGOUT("Running reset init script for 82575\n"); + /* SerDes configuration via SERDESCTRL */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15); + + /* CCM configuration via CCMCTL register */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00); + + /* PCIe lanes configuration */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81); + + /* PCIe PLL Configuration */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00); + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_mac_addr_82575 - Read device MAC address + * @hw: pointer to the HW structure + **/ +static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_read_mac_addr_82575"); + + /* + * If there's an alternate MAC address place it in RAR0 + * so that it will override the Si installed default perm + * address. + */ + ret_val = e1000_check_alt_mac_addr_generic(hw); + if (ret_val) + goto out; + + ret_val = e1000_read_mac_addr_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_config_collision_dist_82575 - Configure collision distance + * @hw: pointer to the HW structure + * + * Configures the collision distance to the default value and is used + * during link setup. + **/ +static void e1000_config_collision_dist_82575(struct e1000_hw *hw) +{ + u32 tctl_ext; + + DEBUGFUNC("e1000_config_collision_dist_82575"); + + tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT); + + tctl_ext &= ~E1000_TCTL_EXT_COLD; + tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT; + + E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_power_down_phy_copper_82575 - Remove link during PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, or wake on lan is not enabled, remove the link. + **/ +static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + + if (!(phy->ops.check_reset_block)) + return; + + /* If the management interface is not enabled, then power down */ + if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw))) + e1000_power_down_phy_copper(hw); + + return; +} + +/** + * e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters + * @hw: pointer to the HW structure + * + * Clears the hardware counters by reading the counter registers. + **/ +static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_clear_hw_cntrs_82575"); + + e1000_clear_hw_cntrs_base_generic(hw); + + E1000_READ_REG(hw, E1000_PRC64); + E1000_READ_REG(hw, E1000_PRC127); + E1000_READ_REG(hw, E1000_PRC255); + E1000_READ_REG(hw, E1000_PRC511); + E1000_READ_REG(hw, E1000_PRC1023); + E1000_READ_REG(hw, E1000_PRC1522); + E1000_READ_REG(hw, E1000_PTC64); + E1000_READ_REG(hw, E1000_PTC127); + E1000_READ_REG(hw, E1000_PTC255); + E1000_READ_REG(hw, E1000_PTC511); + E1000_READ_REG(hw, E1000_PTC1023); + E1000_READ_REG(hw, E1000_PTC1522); + + E1000_READ_REG(hw, E1000_ALGNERRC); + E1000_READ_REG(hw, E1000_RXERRC); + E1000_READ_REG(hw, E1000_TNCRS); + E1000_READ_REG(hw, E1000_CEXTERR); + E1000_READ_REG(hw, E1000_TSCTC); + E1000_READ_REG(hw, E1000_TSCTFC); + + E1000_READ_REG(hw, E1000_MGTPRC); + E1000_READ_REG(hw, E1000_MGTPDC); + E1000_READ_REG(hw, E1000_MGTPTC); + + E1000_READ_REG(hw, E1000_IAC); + E1000_READ_REG(hw, E1000_ICRXOC); + + E1000_READ_REG(hw, E1000_ICRXPTC); + E1000_READ_REG(hw, E1000_ICRXATC); + E1000_READ_REG(hw, E1000_ICTXPTC); + E1000_READ_REG(hw, E1000_ICTXATC); + E1000_READ_REG(hw, E1000_ICTXQEC); + E1000_READ_REG(hw, E1000_ICTXQMTC); + E1000_READ_REG(hw, E1000_ICRXDMTC); + + E1000_READ_REG(hw, E1000_CBTMPC); + E1000_READ_REG(hw, E1000_HTDPMC); + E1000_READ_REG(hw, E1000_CBRMPC); + E1000_READ_REG(hw, E1000_RPTHC); + E1000_READ_REG(hw, E1000_HGPTC); + E1000_READ_REG(hw, E1000_HTCBDPC); + E1000_READ_REG(hw, E1000_HGORCL); + E1000_READ_REG(hw, E1000_HGORCH); + E1000_READ_REG(hw, E1000_HGOTCL); + E1000_READ_REG(hw, E1000_HGOTCH); + E1000_READ_REG(hw, E1000_LENERRS); + + /* This register should not be read in copper configurations */ + if ((hw->phy.media_type == e1000_media_type_internal_serdes) || + e1000_sgmii_active_82575(hw)) + E1000_READ_REG(hw, E1000_SCVPC); +} + +/** + * e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable + * @hw: pointer to the HW structure + * + * After rx enable if managability is enabled then there is likely some + * bad data at the start of the fifo and possibly in the DMA fifo. This + * function clears the fifos and flushes any packets that came in as rx was + * being enabled. + **/ +void e1000_rx_fifo_flush_82575(struct e1000_hw *hw) +{ + u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; + int i, ms_wait; + + DEBUGFUNC("e1000_rx_fifo_workaround_82575"); + if (hw->mac.type != e1000_82575 || + !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) + return; + + /* Disable all Rx queues */ + for (i = 0; i < 4; i++) { + rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); + E1000_WRITE_REG(hw, E1000_RXDCTL(i), + rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE); + } + /* Poll all queues to verify they have shut down */ + for (ms_wait = 0; ms_wait < 10; ms_wait++) { + msec_delay(1); + rx_enabled = 0; + for (i = 0; i < 4; i++) + rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i)); + if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE)) + break; + } + + if (ms_wait == 10) + DEBUGOUT("Queue disable timed out after 10ms\n"); + + /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all + * incoming packets are rejected. Set enable and wait 2ms so that + * any packet that was coming in as RCTL.EN was set is flushed + */ + rfctl = E1000_READ_REG(hw, E1000_RFCTL); + E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF); + + rlpml = E1000_READ_REG(hw, E1000_RLPML); + E1000_WRITE_REG(hw, E1000_RLPML, 0); + + rctl = E1000_READ_REG(hw, E1000_RCTL); + temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP); + temp_rctl |= E1000_RCTL_LPE; + + E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl); + E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN); + E1000_WRITE_FLUSH(hw); + msec_delay(2); + + /* Enable Rx queues that were previously enabled and restore our + * previous state + */ + for (i = 0; i < 4; i++) + E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]); + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + E1000_WRITE_FLUSH(hw); + + E1000_WRITE_REG(hw, E1000_RLPML, rlpml); + E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); + + /* Flush receive errors generated by workaround */ + E1000_READ_REG(hw, E1000_ROC); + E1000_READ_REG(hw, E1000_RNBC); + E1000_READ_REG(hw, E1000_MPC); +} + +/** + * e1000_set_pcie_completion_timeout - set pci-e completion timeout + * @hw: pointer to the HW structure + * + * The defaults for 82575 and 82576 should be in the range of 50us to 50ms, + * however the hardware default for these parts is 500us to 1ms which is less + * than the 10ms recommended by the pci-e spec. To address this we need to + * increase the value to either 10ms to 200ms for capability version 1 config, + * or 16ms to 55ms for version 2. + **/ +static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw) +{ + u32 gcr = E1000_READ_REG(hw, E1000_GCR); + s32 ret_val = E1000_SUCCESS; + u16 pcie_devctl2; + + /* only take action if timeout value is defaulted to 0 */ + if (gcr & E1000_GCR_CMPL_TMOUT_MASK) + goto out; + + /* + * if capababilities version is type 1 we can write the + * timeout of 10ms to 200ms through the GCR register + */ + if (!(gcr & E1000_GCR_CAP_VER2)) { + gcr |= E1000_GCR_CMPL_TMOUT_10ms; + goto out; + } + + /* + * for version 2 capabilities we need to write the config space + * directly in order to set the completion timeout value for + * 16ms to 55ms + */ + ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); + if (ret_val) + goto out; + + pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; + + ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); +out: + /* disable completion timeout resend */ + gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; + + E1000_WRITE_REG(hw, E1000_GCR, gcr); + return ret_val; +} + +/** + * e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing + * @hw: pointer to the hardware struct + * @enable: state to enter, either enabled or disabled + * @pf: Physical Function pool - do not set anti-spoofing for the PF + * + * enables/disables L2 switch anti-spoofing functionality. + **/ +void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf) +{ + u32 reg_val, reg_offset; + + switch (hw->mac.type) { + case e1000_82576: + reg_offset = E1000_DTXSWC; + break; + case e1000_i350: + case e1000_i354: + reg_offset = E1000_TXSWC; + break; + default: + return; + } + + reg_val = E1000_READ_REG(hw, reg_offset); + if (enable) { + reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK | + E1000_DTXSWC_VLAN_SPOOF_MASK); + /* The PF can spoof - it has to in order to + * support emulation mode NICs + */ + reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); + } else { + reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | + E1000_DTXSWC_VLAN_SPOOF_MASK); + } + E1000_WRITE_REG(hw, reg_offset, reg_val); +} + +/** + * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback + * @hw: pointer to the hardware struct + * @enable: state to enter, either enabled or disabled + * + * enables/disables L2 switch loopback functionality. + **/ +void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) +{ + u32 dtxswc; + + switch (hw->mac.type) { + case e1000_82576: + dtxswc = E1000_READ_REG(hw, E1000_DTXSWC); + if (enable) + dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; + else + dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; + E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc); + break; + case e1000_i350: + case e1000_i354: + dtxswc = E1000_READ_REG(hw, E1000_TXSWC); + if (enable) + dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; + else + dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; + E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc); + break; + default: + /* Currently no other hardware supports loopback */ + break; + } + + +} + +/** + * e1000_vmdq_set_replication_pf - enable or disable vmdq replication + * @hw: pointer to the hardware struct + * @enable: state to enter, either enabled or disabled + * + * enables/disables replication of packets across multiple pools. + **/ +void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) +{ + u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL); + + if (enable) + vt_ctl |= E1000_VT_CTL_VM_REPL_EN; + else + vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN; + + E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl); +} + +/** + * e1000_read_phy_reg_82580 - Read 82580 MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the MDI control register in the PHY at offset and stores the + * information read to data. + **/ +static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_read_phy_reg_82580"); + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_read_phy_reg_mdic(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_write_phy_reg_82580 - Write 82580 MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write to register at offset + * + * Writes data to MDI control register in the PHY at offset. + **/ +static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_write_phy_reg_82580"); + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_write_phy_reg_mdic(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits + * @hw: pointer to the HW structure + * + * This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on + * the values found in the EEPROM. This addresses an issue in which these + * bits are not restored from EEPROM after reset. + **/ +static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 mdicnfg; + u16 nvm_data = 0; + + DEBUGFUNC("e1000_reset_mdicnfg_82580"); + + if (hw->mac.type != e1000_82580) + goto out; + if (!e1000_sgmii_active_82575(hw)) + goto out; + + ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG); + if (nvm_data & NVM_WORD24_EXT_MDIO) + mdicnfg |= E1000_MDICNFG_EXT_MDIO; + if (nvm_data & NVM_WORD24_COM_MDIO) + mdicnfg |= E1000_MDICNFG_COM_MDIO; + E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg); +out: + return ret_val; +} + +/** + * e1000_reset_hw_82580 - Reset hardware + * @hw: pointer to the HW structure + * + * This resets function or entire device (all ports, etc.) + * to a known state. + **/ +static s32 e1000_reset_hw_82580(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + /* BH SW mailbox bit in SW_FW_SYNC */ + u16 swmbsw_mask = E1000_SW_SYNCH_MB; + u32 ctrl; + bool global_device_reset = hw->dev_spec._82575.global_device_reset; + + DEBUGFUNC("e1000_reset_hw_82580"); + + hw->dev_spec._82575.global_device_reset = false; + + /* 82580 does not reliably do global_device_reset due to hw errata */ + if (hw->mac.type == e1000_82580) + global_device_reset = false; + + /* Get current control state. */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + /* + * Prevent the PCI-E bus from sticking if there is no TLP connection + * on the last TLP read/write transaction when MAC is reset. + */ + ret_val = e1000_disable_pcie_master_generic(hw); + if (ret_val) + DEBUGOUT("PCI-E Master disable polling has failed.\n"); + + DEBUGOUT("Masking off all interrupts\n"); + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + E1000_WRITE_REG(hw, E1000_RCTL, 0); + E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); + E1000_WRITE_FLUSH(hw); + + msec_delay(10); + + /* Determine whether or not a global dev reset is requested */ + if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw, + swmbsw_mask)) + global_device_reset = false; + + if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) & + E1000_STAT_DEV_RST_SET)) + ctrl |= E1000_CTRL_DEV_RST; + else + ctrl |= E1000_CTRL_RST; + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + + /* Add delay to insure DEV_RST has time to complete */ + if (global_device_reset) + msec_delay(5); + + ret_val = e1000_get_auto_rd_done_generic(hw); + if (ret_val) { + /* + * When auto config read does not complete, do not + * return with an error. This can happen in situations + * where there is no eeprom and prevents getting link. + */ + DEBUGOUT("Auto Read Done did not complete\n"); + } + + /* clear global device reset status bit */ + E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET); + + /* Clear any pending interrupt events. */ + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + E1000_READ_REG(hw, E1000_ICR); + + ret_val = e1000_reset_mdicnfg_82580(hw); + if (ret_val) + DEBUGOUT("Could not reset MDICNFG based on EEPROM\n"); + + /* Install any alternate MAC address into RAR0 */ + ret_val = e1000_check_alt_mac_addr_generic(hw); + + /* Release semaphore */ + if (global_device_reset) + hw->mac.ops.release_swfw_sync(hw, swmbsw_mask); + + return ret_val; +} + +/** + * e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size + * @data: data received by reading RXPBS register + * + * The 82580 uses a table based approach for packet buffer allocation sizes. + * This function converts the retrieved value into the correct table value + * 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 + * 0x0 36 72 144 1 2 4 8 16 + * 0x8 35 70 140 rsv rsv rsv rsv rsv + */ +u16 e1000_rxpbs_adjust_82580(u32 data) +{ + u16 ret_val = 0; + + if (data < E1000_82580_RXPBS_TABLE_SIZE) + ret_val = e1000_82580_rxpbs_table[data]; + + return ret_val; +} + +/** + * e1000_validate_nvm_checksum_with_offset - Validate EEPROM + * checksum + * @hw: pointer to the HW structure + * @offset: offset in words of the checksum protected region + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + **/ +s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) +{ + s32 ret_val = E1000_SUCCESS; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_validate_nvm_checksum_with_offset"); + + for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + checksum += nvm_data; + } + + if (checksum != (u16) NVM_SUM) { + DEBUGOUT("NVM Checksum Invalid\n"); + ret_val = -E1000_ERR_NVM; + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_update_nvm_checksum_with_offset - Update EEPROM + * checksum + * @hw: pointer to the HW structure + * @offset: offset in words of the checksum protected region + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. + **/ +s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) +{ + s32 ret_val; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_update_nvm_checksum_with_offset"); + + for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error while updating checksum.\n"); + goto out; + } + checksum += nvm_data; + } + checksum = (u16) NVM_SUM - checksum; + ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1, + &checksum); + if (ret_val) + DEBUGOUT("NVM Write Error while updating checksum.\n"); + +out: + return ret_val; +} + +/** + * e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM section checksum by reading/adding each word of + * the EEPROM and then verifies that the sum of the EEPROM is + * equal to 0xBABA. + **/ +static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 eeprom_regions_count = 1; + u16 j, nvm_data; + u16 nvm_offset; + + DEBUGFUNC("e1000_validate_nvm_checksum_82580"); + + ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) { + /* if chekcsums compatibility bit is set validate checksums + * for all 4 ports. */ + eeprom_regions_count = 4; + } + + for (j = 0; j < eeprom_regions_count; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_validate_nvm_checksum_with_offset(hw, + nvm_offset); + if (ret_val != E1000_SUCCESS) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_update_nvm_checksum_82580 - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM section checksums for all 4 ports by reading/adding + * each word of the EEPROM up to the checksum. Then calculates the EEPROM + * checksum and writes the value to the EEPROM. + **/ +static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw) +{ + s32 ret_val; + u16 j, nvm_data; + u16 nvm_offset; + + DEBUGFUNC("e1000_update_nvm_checksum_82580"); + + ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n"); + goto out; + } + + if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) { + /* set compatibility bit to validate checksums appropriately */ + nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK; + ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1, + &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n"); + goto out; + } + } + + for (j = 0; j < 4; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); + if (ret_val) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM section checksum by reading/adding each word of + * the EEPROM and then verifies that the sum of the EEPROM is + * equal to 0xBABA. + **/ +static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 j; + u16 nvm_offset; + + DEBUGFUNC("e1000_validate_nvm_checksum_i350"); + + for (j = 0; j < 4; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_validate_nvm_checksum_with_offset(hw, + nvm_offset); + if (ret_val != E1000_SUCCESS) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_update_nvm_checksum_i350 - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM section checksums for all 4 ports by reading/adding + * each word of the EEPROM up to the checksum. Then calculates the EEPROM + * checksum and writes the value to the EEPROM. + **/ +static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 j; + u16 nvm_offset; + + DEBUGFUNC("e1000_update_nvm_checksum_i350"); + + for (j = 0; j < 4; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); + if (ret_val != E1000_SUCCESS) + goto out; + } + +out: + return ret_val; +} + +/** + * __e1000_access_emi_reg - Read/write EMI register + * @hw: pointer to the HW structure + * @addr: EMI address to program + * @data: pointer to value to read/write from/to the EMI address + * @read: boolean flag to indicate read or write + **/ +static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address, + u16 *data, bool read) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("__e1000_access_emi_reg"); + + ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address); + if (ret_val) + return ret_val; + + if (read) + ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data); + else + ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data); + + return ret_val; +} + +/** + * e1000_read_emi_reg - Read Extended Management Interface register + * @hw: pointer to the HW structure + * @addr: EMI address to program + * @data: value to be read from the EMI address + **/ +s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data) +{ + DEBUGFUNC("e1000_read_emi_reg"); + + return __e1000_access_emi_reg(hw, addr, data, true); +} + +/** + * e1000_set_eee_i350 - Enable/disable EEE support + * @hw: pointer to the HW structure + * + * Enable/disable EEE based on setting in dev_spec structure. + * + **/ +s32 e1000_set_eee_i350(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 ipcnfg, eeer; + + DEBUGFUNC("e1000_set_eee_i350"); + + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) + goto out; + ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG); + eeer = E1000_READ_REG(hw, E1000_EEER); + + /* enable or disable per user setting */ + if (!(hw->dev_spec._82575.eee_disable)) { + u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU); + + ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); + eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | + E1000_EEER_LPI_FC); + + /* This bit should not be set in normal operation. */ + if (eee_su & E1000_EEE_SU_LPI_CLK_STP) + DEBUGOUT("LPI Clock Stop Bit should not be set!\n"); + } else { + ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); + eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | + E1000_EEER_LPI_FC); + } + E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg); + E1000_WRITE_REG(hw, E1000_EEER, eeer); + E1000_READ_REG(hw, E1000_IPCNFG); + E1000_READ_REG(hw, E1000_EEER); +out: + + return ret_val; +} + +/** + * e1000_set_eee_i354 - Enable/disable EEE support + * @hw: pointer to the HW structure + * + * Enable/disable EEE legacy mode based on setting in dev_spec structure. + * + **/ +s32 e1000_set_eee_i354(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_data; + + DEBUGFUNC("e1000_set_eee_i354"); + + if ((hw->phy.media_type != e1000_media_type_copper) || + ((phy->id != M88E1543_E_PHY_ID))) + goto out; + + if (!hw->dev_spec._82575.eee_disable) { + /* Switch to PHY page 18. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18); + if (ret_val) + goto out; + + ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1, + &phy_data); + if (ret_val) + goto out; + + phy_data |= E1000_M88E1543_EEE_CTRL_1_MS; + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1, + phy_data); + if (ret_val) + goto out; + + /* Return the PHY to page 0. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); + if (ret_val) + goto out; + + /* Turn on EEE advertisement. */ + ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + phy_data |= E1000_EEE_ADV_100_SUPPORTED | + E1000_EEE_ADV_1000_SUPPORTED; + ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + phy_data); + } else { + /* Turn off EEE advertisement. */ + ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED | + E1000_EEE_ADV_1000_SUPPORTED); + ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + phy_data); + } + +out: + return ret_val; +} + +/** + * e1000_get_eee_status_i354 - Get EEE status + * @hw: pointer to the HW structure + * @status: EEE status + * + * Get EEE status by guessing based on whether Tx or Rx LPI indications have + * been received. + **/ +s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_data; + + DEBUGFUNC("e1000_get_eee_status_i354"); + + /* Check if EEE is supported on this device. */ + if ((hw->phy.media_type != e1000_media_type_copper) || + ((phy->id != M88E1543_E_PHY_ID))) + goto out; + + ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354, + E1000_PCS_STATUS_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD | + E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false; + +out: + return ret_val; +} + +/* Due to a hw errata, if the host tries to configure the VFTA register + * while performing queries from the BMC or DMA, then the VFTA in some + * cases won't be written. + */ + +/** + * e1000_clear_vfta_i350 - Clear VLAN filter table + * @hw: pointer to the HW structure + * + * Clears the register array which contains the VLAN filter table by + * setting all the values to 0. + **/ +void e1000_clear_vfta_i350(struct e1000_hw *hw) +{ + u32 offset; + int i; + + DEBUGFUNC("e1000_clear_vfta_350"); + + for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); + + E1000_WRITE_FLUSH(hw); + } +} + +/** + * e1000_write_vfta_i350 - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: register offset in VLAN filter table + * @value: register value written to VLAN filter table + * + * Writes value at the given offset in the register array which stores + * the VLAN filter table. + **/ +void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) +{ + int i; + + DEBUGFUNC("e1000_write_vfta_350"); + + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); + + E1000_WRITE_FLUSH(hw); +} + + +/** + * e1000_set_i2c_bb - Enable I2C bit-bang + * @hw: pointer to the HW structure + * + * Enable I2C bit-bang interface + * + **/ +s32 e1000_set_i2c_bb(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 ctrl_ext, i2cparams; + + DEBUGFUNC("e1000_set_i2c_bb"); + + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_I2C_ENA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + E1000_WRITE_FLUSH(hw); + + i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS); + i2cparams |= E1000_I2CBB_EN; + i2cparams |= E1000_I2C_DATA_OE_N; + i2cparams |= E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams); + E1000_WRITE_FLUSH(hw); + + return ret_val; +} + +/** + * e1000_read_i2c_byte_generic - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @dev_addr: device address + * @data: value read + * + * Performs byte read operation over I2C interface at + * a specified device address. + **/ +s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + s32 status = E1000_SUCCESS; + u32 max_retry = 10; + u32 retry = 1; + u16 swfw_mask = 0; + + bool nack = true; + + DEBUGFUNC("e1000_read_i2c_byte_generic"); + + swfw_mask = E1000_SWFW_PHY0_SM; + + do { + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) + != E1000_SUCCESS) { + status = E1000_ERR_SWFW_SYNC; + goto read_byte_out; + } + + e1000_i2c_start(hw); + + /* Device Address and write indication */ + status = e1000_clock_out_i2c_byte(hw, dev_addr); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_byte(hw, byte_offset); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + e1000_i2c_start(hw); + + /* Device Address and read indication */ + status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1)); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_in_i2c_byte(hw, data); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_bit(hw, nack); + if (status != E1000_SUCCESS) + goto fail; + + e1000_i2c_stop(hw); + break; + +fail: + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + msec_delay(100); + e1000_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + DEBUGOUT("I2C byte read error - Retrying.\n"); + else + DEBUGOUT("I2C byte read error.\n"); + + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +read_byte_out: + + return status; +} + +/** + * e1000_write_i2c_byte_generic - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: value to write + * + * Performs byte write operation over I2C interface at + * a specified device address. + **/ +s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + s32 status = E1000_SUCCESS; + u32 max_retry = 1; + u32 retry = 0; + u16 swfw_mask = 0; + + DEBUGFUNC("e1000_write_i2c_byte_generic"); + + swfw_mask = E1000_SWFW_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) { + status = E1000_ERR_SWFW_SYNC; + goto write_byte_out; + } + + do { + e1000_i2c_start(hw); + + status = e1000_clock_out_i2c_byte(hw, dev_addr); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_byte(hw, byte_offset); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_byte(hw, data); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + e1000_i2c_stop(hw); + break; + +fail: + e1000_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + DEBUGOUT("I2C byte write error - Retrying.\n"); + else + DEBUGOUT("I2C byte write error.\n"); + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +write_byte_out: + + return status; +} + +/** + * e1000_i2c_start - Sets I2C start condition + * @hw: pointer to hardware structure + * + * Sets I2C start condition (High -> Low on SDA while SCL is High) + **/ +static void e1000_i2c_start(struct e1000_hw *hw) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_i2c_start"); + + /* Start condition must begin with data and clock high */ + e1000_set_i2c_data(hw, &i2cctl, 1); + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for start condition (4.7us) */ + usec_delay(E1000_I2C_T_SU_STA); + + e1000_set_i2c_data(hw, &i2cctl, 0); + + /* Hold time for start condition (4us) */ + usec_delay(E1000_I2C_T_HD_STA); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + usec_delay(E1000_I2C_T_LOW); + +} + +/** + * e1000_i2c_stop - Sets I2C stop condition + * @hw: pointer to hardware structure + * + * Sets I2C stop condition (Low -> High on SDA while SCL is High) + **/ +static void e1000_i2c_stop(struct e1000_hw *hw) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_i2c_stop"); + + /* Stop condition must begin with data low and clock high */ + e1000_set_i2c_data(hw, &i2cctl, 0); + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for stop condition (4us) */ + usec_delay(E1000_I2C_T_SU_STO); + + e1000_set_i2c_data(hw, &i2cctl, 1); + + /* bus free time between stop and start (4.7us)*/ + usec_delay(E1000_I2C_T_BUF); +} + +/** + * e1000_clock_in_i2c_byte - Clocks in one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte to clock in + * + * Clocks in one byte data via I2C data/clock + **/ +static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data) +{ + s32 i; + bool bit = 0; + + DEBUGFUNC("e1000_clock_in_i2c_byte"); + + *data = 0; + for (i = 7; i >= 0; i--) { + e1000_clock_in_i2c_bit(hw, &bit); + *data |= bit << i; + } + + return E1000_SUCCESS; +} + +/** + * e1000_clock_out_i2c_byte - Clocks out one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte clocked out + * + * Clocks out one byte data via I2C data/clock + **/ +static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data) +{ + s32 status = E1000_SUCCESS; + s32 i; + u32 i2cctl; + bool bit = 0; + + DEBUGFUNC("e1000_clock_out_i2c_byte"); + + for (i = 7; i >= 0; i--) { + bit = (data >> i) & 0x1; + status = e1000_clock_out_i2c_bit(hw, bit); + + if (status != E1000_SUCCESS) + break; + } + + /* Release SDA line (set high) */ + i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + i2cctl |= E1000_I2C_DATA_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); + E1000_WRITE_FLUSH(hw); + + return status; +} + +/** + * e1000_get_i2c_ack - Polls for I2C ACK + * @hw: pointer to hardware structure + * + * Clocks in/out one bit via I2C data/clock + **/ +static s32 e1000_get_i2c_ack(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + u32 i = 0; + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + u32 timeout = 10; + bool ack = true; + + DEBUGFUNC("e1000_get_i2c_ack"); + + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + /* Wait until SCL returns high */ + for (i = 0; i < timeout; i++) { + usec_delay(1); + i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + if (i2cctl & E1000_I2C_CLK_IN) + break; + } + if (!(i2cctl & E1000_I2C_CLK_IN)) + return E1000_ERR_I2C; + + ack = e1000_get_i2c_data(&i2cctl); + if (ack) { + DEBUGOUT("I2C ack was not received.\n"); + status = E1000_ERR_I2C; + } + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + usec_delay(E1000_I2C_T_LOW); + + return status; +} + +/** + * e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: read data value + * + * Clocks in one bit via I2C data/clock + **/ +static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_clock_in_i2c_bit"); + + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + *data = e1000_get_i2c_data(&i2cctl); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + usec_delay(E1000_I2C_T_LOW); + + return E1000_SUCCESS; +} + +/** + * e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: data value to write + * + * Clocks out one bit via I2C data/clock + **/ +static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data) +{ + s32 status; + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_clock_out_i2c_bit"); + + status = e1000_set_i2c_data(hw, &i2cctl, data); + if (status == E1000_SUCCESS) { + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us. + * This also takes care of the data hold time. + */ + usec_delay(E1000_I2C_T_LOW); + } else { + status = E1000_ERR_I2C; + DEBUGOUT1("I2C data was not set to %X\n", data); + } + + return status; +} +/** + * e1000_raise_i2c_clk - Raises the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Raises the I2C clock line '0'->'1' + **/ +static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) +{ + DEBUGFUNC("e1000_raise_i2c_clk"); + + *i2cctl |= E1000_I2C_CLK_OUT; + *i2cctl &= ~E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); + E1000_WRITE_FLUSH(hw); + + /* SCL rise time (1000ns) */ + usec_delay(E1000_I2C_T_RISE); +} + +/** + * e1000_lower_i2c_clk - Lowers the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Lowers the I2C clock line '1'->'0' + **/ +static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) +{ + + DEBUGFUNC("e1000_lower_i2c_clk"); + + *i2cctl &= ~E1000_I2C_CLK_OUT; + *i2cctl &= ~E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); + E1000_WRITE_FLUSH(hw); + + /* SCL fall time (300ns) */ + usec_delay(E1000_I2C_T_FALL); +} + +/** + * e1000_set_i2c_data - Sets the I2C data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * @data: I2C data value (0 or 1) to set + * + * Sets the I2C data bit + **/ +static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data) +{ + s32 status = E1000_SUCCESS; + + DEBUGFUNC("e1000_set_i2c_data"); + + if (data) + *i2cctl |= E1000_I2C_DATA_OUT; + else + *i2cctl &= ~E1000_I2C_DATA_OUT; + + *i2cctl &= ~E1000_I2C_DATA_OE_N; + *i2cctl |= E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); + E1000_WRITE_FLUSH(hw); + + /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ + usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA); + + *i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + if (data != e1000_get_i2c_data(i2cctl)) { + status = E1000_ERR_I2C; + DEBUGOUT1("Error - I2C data was not set to %X.\n", data); + } + + return status; +} + +/** + * e1000_get_i2c_data - Reads the I2C SDA data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Returns the I2C data bit value + **/ +static bool e1000_get_i2c_data(u32 *i2cctl) +{ + bool data; + + DEBUGFUNC("e1000_get_i2c_data"); + + if (*i2cctl & E1000_I2C_DATA_IN) + data = 1; + else + data = 0; + + return data; +} + +/** + * e1000_i2c_bus_clear - Clears the I2C bus + * @hw: pointer to hardware structure + * + * Clears the I2C bus by sending nine clock pulses. + * Used when data line is stuck low. + **/ +void e1000_i2c_bus_clear(struct e1000_hw *hw) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + u32 i; + + DEBUGFUNC("e1000_i2c_bus_clear"); + + e1000_i2c_start(hw); + + e1000_set_i2c_data(hw, &i2cctl, 1); + + for (i = 0; i < 9; i++) { + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Min high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Min low period of clock is 4.7us*/ + usec_delay(E1000_I2C_T_LOW); + } + + e1000_i2c_start(hw); + + /* Put the i2c bus back to default state */ + e1000_i2c_stop(hw); +} + +static const u8 e1000_emc_temp_data[4] = { + E1000_EMC_INTERNAL_DATA, + E1000_EMC_DIODE1_DATA, + E1000_EMC_DIODE2_DATA, + E1000_EMC_DIODE3_DATA +}; +static const u8 e1000_emc_therm_limit[4] = { + E1000_EMC_INTERNAL_THERM_LIMIT, + E1000_EMC_DIODE1_THERM_LIMIT, + E1000_EMC_DIODE2_THERM_LIMIT, + E1000_EMC_DIODE3_THERM_LIMIT +}; + +/** + * e1000_get_thermal_sensor_data_generic - Gathers thermal sensor data + * @hw: pointer to hardware structure + * + * Updates the temperatures in mac.thermal_sensor_data + **/ +s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 i; + struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + DEBUGFUNC("e1000_get_thermal_sensor_data_generic"); + + if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) + return E1000_NOT_IMPLEMENTED; + + data->sensor[0].temp = (E1000_READ_REG(hw, E1000_THMJT) & 0xFF); + + /* Return the internal sensor only if ETS is unsupported */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset); + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) + return status; + + e1000_read_nvm(hw, ets_offset, 1, &ets_cfg); + if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) + != NVM_ETS_TYPE_EMC) + return E1000_NOT_IMPLEMENTED; + + num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); + if (num_sensors > E1000_MAX_SENSORS) + num_sensors = E1000_MAX_SENSORS; + + for (i = 1; i < num_sensors; i++) { + e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor); + sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> + NVM_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> + NVM_ETS_DATA_LOC_SHIFT); + + if (sensor_location != 0) + hw->phy.ops.read_i2c_byte(hw, + e1000_emc_temp_data[sensor_index], + E1000_I2C_THERMAL_SENSOR_ADDR, + &data->sensor[i].temp); + } + return status; +} + +/** + * e1000_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Sets the thermal sensor thresholds according to the NVM map + * and save off the threshold and location values into mac.thermal_sensor_data + **/ +s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 low_thresh_delta; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 therm_limit; + u8 i; + struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + DEBUGFUNC("e1000_init_thermal_sensor_thresh_generic"); + + if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) + return E1000_NOT_IMPLEMENTED; + + memset(data, 0, sizeof(struct e1000_thermal_sensor_data)); + + data->sensor[0].location = 0x1; + data->sensor[0].caution_thresh = + (E1000_READ_REG(hw, E1000_THHIGHTC) & 0xFF); + data->sensor[0].max_op_thresh = + (E1000_READ_REG(hw, E1000_THLOWTC) & 0xFF); + + /* Return the internal sensor only if ETS is unsupported */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset); + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) + return status; + + e1000_read_nvm(hw, ets_offset, 1, &ets_cfg); + if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) + != NVM_ETS_TYPE_EMC) + return E1000_NOT_IMPLEMENTED; + + low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >> + NVM_ETS_LTHRES_DELTA_SHIFT); + num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); + + for (i = 1; i <= num_sensors; i++) { + e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor); + sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> + NVM_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> + NVM_ETS_DATA_LOC_SHIFT); + therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK; + + hw->phy.ops.write_i2c_byte(hw, + e1000_emc_therm_limit[sensor_index], + E1000_I2C_THERMAL_SENSOR_ADDR, + therm_limit); + + if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) { + data->sensor[i].location = sensor_location; + data->sensor[i].caution_thresh = therm_limit; + data->sensor[i].max_op_thresh = therm_limit - + low_thresh_delta; + } + } + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h new file mode 100644 index 00000000..1aec75ab --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h @@ -0,0 +1,509 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_82575_H_ +#define _E1000_82575_H_ + +#define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \ + (ID_LED_DEF1_DEF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_OFF1_ON2)) +/* + * Receive Address Register Count + * Number of high/low register pairs in the RAR. The RAR (Receive Address + * Registers) holds the directed and multicast addresses that we monitor. + * These entries are also used for MAC-based filtering. + */ +/* + * For 82576, there are an additional set of RARs that begin at an offset + * separate from the first set of RARs. + */ +#define E1000_RAR_ENTRIES_82575 16 +#define E1000_RAR_ENTRIES_82576 24 +#define E1000_RAR_ENTRIES_82580 24 +#define E1000_RAR_ENTRIES_I350 32 +#define E1000_SW_SYNCH_MB 0x00000100 +#define E1000_STAT_DEV_RST_SET 0x00100000 +#define E1000_CTRL_DEV_RST 0x20000000 + +struct e1000_adv_data_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + union { + u32 data; + struct { + u32 datalen:16; /* Data buffer length */ + u32 rsvd:4; + u32 dtyp:4; /* Descriptor type */ + u32 dcmd:8; /* Descriptor command */ + } config; + } lower; + union { + u32 data; + struct { + u32 status:4; /* Descriptor status */ + u32 idx:4; + u32 popts:6; /* Packet Options */ + u32 paylen:18; /* Payload length */ + } options; + } upper; +}; + +#define E1000_TXD_DTYP_ADV_C 0x2 /* Advanced Context Descriptor */ +#define E1000_TXD_DTYP_ADV_D 0x3 /* Advanced Data Descriptor */ +#define E1000_ADV_TXD_CMD_DEXT 0x20 /* Descriptor extension (0 = legacy) */ +#define E1000_ADV_TUCMD_IPV4 0x2 /* IP Packet Type: 1=IPv4 */ +#define E1000_ADV_TUCMD_IPV6 0x0 /* IP Packet Type: 0=IPv6 */ +#define E1000_ADV_TUCMD_L4T_UDP 0x0 /* L4 Packet TYPE of UDP */ +#define E1000_ADV_TUCMD_L4T_TCP 0x4 /* L4 Packet TYPE of TCP */ +#define E1000_ADV_TUCMD_MKRREQ 0x10 /* Indicates markers are required */ +#define E1000_ADV_DCMD_EOP 0x1 /* End of Packet */ +#define E1000_ADV_DCMD_IFCS 0x2 /* Insert FCS (Ethernet CRC) */ +#define E1000_ADV_DCMD_RS 0x8 /* Report Status */ +#define E1000_ADV_DCMD_VLE 0x40 /* Add VLAN tag */ +#define E1000_ADV_DCMD_TSE 0x80 /* TCP Seg enable */ +/* Extended Device Control */ +#define E1000_CTRL_EXT_NSICR 0x00000001 /* Disable Intr Clear all on read */ + +struct e1000_adv_context_desc { + union { + u32 ip_config; + struct { + u32 iplen:9; + u32 maclen:7; + u32 vlan_tag:16; + } fields; + } ip_setup; + u32 seq_num; + union { + u64 l4_config; + struct { + u32 mkrloc:9; + u32 tucmd:11; + u32 dtyp:4; + u32 adv:8; + u32 rsvd:4; + u32 idx:4; + u32 l4len:8; + u32 mss:16; + } fields; + } l4_setup; +}; + +/* SRRCTL bit definitions */ +#define E1000_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ +#define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 +#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ +#define E1000_SRRCTL_DESCTYPE_LEGACY 0x00000000 +#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 +#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION 0x06000000 +#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 +#define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 +#define E1000_SRRCTL_TIMESTAMP 0x40000000 +#define E1000_SRRCTL_DROP_EN 0x80000000 + +#define E1000_SRRCTL_BSIZEPKT_MASK 0x0000007F +#define E1000_SRRCTL_BSIZEHDR_MASK 0x00003F00 + +#define E1000_TX_HEAD_WB_ENABLE 0x1 +#define E1000_TX_SEQNUM_WB_ENABLE 0x2 + +#define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 +#define E1000_MRQC_ENABLE_VMDQ 0x00000003 +#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005 +#define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 +#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 +#define E1000_MRQC_ENABLE_RSS_8Q 0x00000002 + +#define E1000_VMRCTL_MIRROR_PORT_SHIFT 8 +#define E1000_VMRCTL_MIRROR_DSTPORT_MASK (7 << \ + E1000_VMRCTL_MIRROR_PORT_SHIFT) +#define E1000_VMRCTL_POOL_MIRROR_ENABLE (1 << 0) +#define E1000_VMRCTL_UPLINK_MIRROR_ENABLE (1 << 1) +#define E1000_VMRCTL_DOWNLINK_MIRROR_ENABLE (1 << 2) + +#define E1000_EICR_TX_QUEUE ( \ + E1000_EICR_TX_QUEUE0 | \ + E1000_EICR_TX_QUEUE1 | \ + E1000_EICR_TX_QUEUE2 | \ + E1000_EICR_TX_QUEUE3) + +#define E1000_EICR_RX_QUEUE ( \ + E1000_EICR_RX_QUEUE0 | \ + E1000_EICR_RX_QUEUE1 | \ + E1000_EICR_RX_QUEUE2 | \ + E1000_EICR_RX_QUEUE3) + +#define E1000_EIMS_RX_QUEUE E1000_EICR_RX_QUEUE +#define E1000_EIMS_TX_QUEUE E1000_EICR_TX_QUEUE + +#define EIMS_ENABLE_MASK ( \ + E1000_EIMS_RX_QUEUE | \ + E1000_EIMS_TX_QUEUE | \ + E1000_EIMS_TCP_TIMER | \ + E1000_EIMS_OTHER) + +/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ +#define E1000_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */ +#define E1000_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */ +#define E1000_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ +#define E1000_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */ +#define E1000_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */ +#define E1000_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */ +#define E1000_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */ +#define E1000_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */ +#define E1000_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */ +#define E1000_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */ + +/* Receive Descriptor - Advanced */ +union e1000_adv_rx_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + } read; + struct { + struct { + union { + __le32 data; + struct { + __le16 pkt_info; /*RSS type, Pkt type*/ + /* Split Header, header buffer len */ + __le16 hdr_info; + } hs_rss; + } lo_dword; + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length; /* Packet length */ + __le16 vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +#define E1000_RXDADV_RSSTYPE_MASK 0x0000000F +#define E1000_RXDADV_RSSTYPE_SHIFT 12 +#define E1000_RXDADV_HDRBUFLEN_MASK 0x7FE0 +#define E1000_RXDADV_HDRBUFLEN_SHIFT 5 +#define E1000_RXDADV_SPLITHEADER_EN 0x00001000 +#define E1000_RXDADV_SPH 0x8000 +#define E1000_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */ +#define E1000_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ +#define E1000_RXDADV_ERR_HBO 0x00800000 + +/* RSS Hash results */ +#define E1000_RXDADV_RSSTYPE_NONE 0x00000000 +#define E1000_RXDADV_RSSTYPE_IPV4_TCP 0x00000001 +#define E1000_RXDADV_RSSTYPE_IPV4 0x00000002 +#define E1000_RXDADV_RSSTYPE_IPV6_TCP 0x00000003 +#define E1000_RXDADV_RSSTYPE_IPV6_EX 0x00000004 +#define E1000_RXDADV_RSSTYPE_IPV6 0x00000005 +#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006 +#define E1000_RXDADV_RSSTYPE_IPV4_UDP 0x00000007 +#define E1000_RXDADV_RSSTYPE_IPV6_UDP 0x00000008 +#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009 + +/* RSS Packet Types as indicated in the receive descriptor */ +#define E1000_RXDADV_PKTTYPE_NONE 0x00000000 +#define E1000_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPV4 hdr present */ +#define E1000_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPV4 hdr + extensions */ +#define E1000_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPV6 hdr present */ +#define E1000_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPV6 hdr + extensions */ +#define E1000_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */ +#define E1000_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ +#define E1000_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ +#define E1000_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ + +#define E1000_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ +#define E1000_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */ +#define E1000_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */ +#define E1000_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */ +#define E1000_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ +#define E1000_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ + +/* LinkSec results */ +/* Security Processing bit Indication */ +#define E1000_RXDADV_LNKSEC_STATUS_SECP 0x00020000 +#define E1000_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 +#define E1000_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 +#define E1000_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 +#define E1000_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 + +#define E1000_RXDADV_IPSEC_STATUS_SECP 0x00020000 +#define E1000_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 +#define E1000_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 +#define E1000_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 +#define E1000_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED 0x18000000 + +/* Transmit Descriptor - Advanced */ +union e1000_adv_tx_desc { + struct { + __le64 buffer_addr; /* Address of descriptor's data buf */ + __le32 cmd_type_len; + __le32 olinfo_status; + } read; + struct { + __le64 rsvd; /* Reserved */ + __le32 nxtseq_seed; + __le32 status; + } wb; +}; + +/* Adv Transmit Descriptor Config Masks */ +#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ +#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ +#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ +#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ +#define E1000_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */ +#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ +#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ +#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ +#define E1000_ADVTXD_MAC_LINKSEC 0x00040000 /* Apply LinkSec on pkt */ +#define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp pkt */ +#define E1000_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED prsnt in WB */ +#define E1000_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */ +#define E1000_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ +#define E1000_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ +#define E1000_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ +/* 1st & Last TSO-full iSCSI PDU*/ +#define E1000_ADVTXD_POPTS_ISCO_FULL 0x00001800 +#define E1000_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ +#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ + +/* Context descriptors */ +struct e1000_adv_tx_context_desc { + __le32 vlan_macip_lens; + __le32 seqnum_seed; + __le32 type_tucmd_mlhl; + __le32 mss_l4len_idx; +}; + +#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ +#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ +#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ +#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ +#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ +#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ +#define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ +/* IPSec Encrypt Enable for ESP */ +#define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 +/* Req requires Markers and CRC */ +#define E1000_ADVTXD_TUCMD_MKRREQ 0x00002000 +#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ +/* Adv ctxt IPSec SA IDX mask */ +#define E1000_ADVTXD_IPSEC_SA_INDEX_MASK 0x000000FF +/* Adv ctxt IPSec ESP len mask */ +#define E1000_ADVTXD_IPSEC_ESP_LEN_MASK 0x000000FF + +/* Additional Transmit Descriptor Control definitions */ +#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ +#define E1000_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wbk flushing */ +/* Tx Queue Arbitration Priority 0=low, 1=high */ +#define E1000_TXDCTL_PRIORITY 0x08000000 + +/* Additional Receive Descriptor Control definitions */ +#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ +#define E1000_RXDCTL_SWFLSH 0x04000000 /* Rx Desc. wbk flushing */ + +/* Direct Cache Access (DCA) definitions */ +#define E1000_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */ +#define E1000_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */ + +#define E1000_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */ +#define E1000_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ + +#define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ +#define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */ +#define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header ena */ +#define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload ena */ +#define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx Desc Relax Order */ + +#define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ +#define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ +#define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ +#define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ +#define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ + +#define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */ +#define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */ +#define E1000_DCA_TXCTRL_CPUID_SHIFT_82576 24 /* Tx CPUID */ +#define E1000_DCA_RXCTRL_CPUID_SHIFT_82576 24 /* Rx CPUID */ + +/* Additional interrupt register bit definitions */ +#define E1000_ICR_LSECPNS 0x00000020 /* PN threshold - server */ +#define E1000_IMS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ +#define E1000_ICS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ + +/* ETQF register bit definitions */ +#define E1000_ETQF_FILTER_ENABLE (1 << 26) +#define E1000_ETQF_IMM_INT (1 << 29) +#define E1000_ETQF_1588 (1 << 30) +#define E1000_ETQF_QUEUE_ENABLE (1 << 31) +/* + * ETQF filter list: one static filter per filter consumer. This is + * to avoid filter collisions later. Add new filters + * here!! + * + * Current filters: + * EAPOL 802.1x (0x888e): Filter 0 + */ +#define E1000_ETQF_FILTER_EAPOL 0 + +#define E1000_FTQF_VF_BP 0x00008000 +#define E1000_FTQF_1588_TIME_STAMP 0x08000000 +#define E1000_FTQF_MASK 0xF0000000 +#define E1000_FTQF_MASK_PROTO_BP 0x10000000 +#define E1000_FTQF_MASK_SOURCE_ADDR_BP 0x20000000 +#define E1000_FTQF_MASK_DEST_ADDR_BP 0x40000000 +#define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000 + +#define E1000_NVM_APME_82575 0x0400 +#define MAX_NUM_VFS 7 + +#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof cntrl */ +#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof cntrl */ +#define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */ +#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8 +#define E1000_DTXSWC_LLE_SHIFT 16 +#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */ + +/* Easy defines for setting default pool, would normally be left a zero */ +#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7 +#define E1000_VT_CTL_DEFAULT_POOL_MASK (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT) + +/* Other useful VMD_CTL register defines */ +#define E1000_VT_CTL_IGNORE_MAC (1 << 28) +#define E1000_VT_CTL_DISABLE_DEF_POOL (1 << 29) +#define E1000_VT_CTL_VM_REPL_EN (1 << 30) + +/* Per VM Offload register setup */ +#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */ +#define E1000_VMOLR_LPE 0x00010000 /* Accept Long packet */ +#define E1000_VMOLR_RSSE 0x00020000 /* Enable RSS */ +#define E1000_VMOLR_AUPE 0x01000000 /* Accept untagged packets */ +#define E1000_VMOLR_ROMPE 0x02000000 /* Accept overflow multicast */ +#define E1000_VMOLR_ROPE 0x04000000 /* Accept overflow unicast */ +#define E1000_VMOLR_BAM 0x08000000 /* Accept Broadcast packets */ +#define E1000_VMOLR_MPME 0x10000000 /* Multicast promiscuous mode */ +#define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ +#define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */ + +#define E1000_VMOLR_VPE 0x00800000 /* VLAN promiscuous enable */ +#define E1000_VMOLR_UPE 0x20000000 /* Unicast promisuous enable */ +#define E1000_DVMOLR_HIDVLAN 0x20000000 /* Vlan hiding enable */ +#define E1000_DVMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ +#define E1000_DVMOLR_STRCRC 0x80000000 /* CRC stripping enable */ + +#define E1000_PBRWAC_WALPB 0x00000007 /* Wrap around event on LAN Rx PB */ +#define E1000_PBRWAC_PBE 0x00000008 /* Rx packet buffer empty */ + +#define E1000_VLVF_ARRAY_SIZE 32 +#define E1000_VLVF_VLANID_MASK 0x00000FFF +#define E1000_VLVF_POOLSEL_SHIFT 12 +#define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT) +#define E1000_VLVF_LVLAN 0x00100000 +#define E1000_VLVF_VLANID_ENABLE 0x80000000 + +#define E1000_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ +#define E1000_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ + +#define E1000_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ + +#define E1000_IOVCTL 0x05BBC +#define E1000_IOVCTL_REUSE_VFQ 0x00000001 + +#define E1000_RPLOLR_STRVLAN 0x40000000 +#define E1000_RPLOLR_STRCRC 0x80000000 + +#define E1000_TCTL_EXT_COLD 0x000FFC00 +#define E1000_TCTL_EXT_COLD_SHIFT 10 + +#define E1000_DTXCTL_8023LL 0x0004 +#define E1000_DTXCTL_VLAN_ADDED 0x0008 +#define E1000_DTXCTL_OOS_ENABLE 0x0010 +#define E1000_DTXCTL_MDP_EN 0x0020 +#define E1000_DTXCTL_SPOOF_INT 0x0040 + +#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT (1 << 14) + +#define ALL_QUEUES 0xFFFF + +/* Rx packet buffer size defines */ +#define E1000_RXPBS_SIZE_MASK_82576 0x0000007F +void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable); +void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf); +void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable); +s32 e1000_init_nvm_params_82575(struct e1000_hw *hw); + +u16 e1000_rxpbs_adjust_82580(u32 data); +s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data); +s32 e1000_set_eee_i350(struct e1000_hw *); +s32 e1000_set_eee_i354(struct e1000_hw *); +s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *); +#define E1000_I2C_THERMAL_SENSOR_ADDR 0xF8 +#define E1000_EMC_INTERNAL_DATA 0x00 +#define E1000_EMC_INTERNAL_THERM_LIMIT 0x20 +#define E1000_EMC_DIODE1_DATA 0x01 +#define E1000_EMC_DIODE1_THERM_LIMIT 0x19 +#define E1000_EMC_DIODE2_DATA 0x23 +#define E1000_EMC_DIODE2_THERM_LIMIT 0x1A +#define E1000_EMC_DIODE3_DATA 0x2A +#define E1000_EMC_DIODE3_THERM_LIMIT 0x30 + +s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw); +s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw); + +/* I2C SDA and SCL timing parameters for standard mode */ +#define E1000_I2C_T_HD_STA 4 +#define E1000_I2C_T_LOW 5 +#define E1000_I2C_T_HIGH 4 +#define E1000_I2C_T_SU_STA 5 +#define E1000_I2C_T_HD_DATA 5 +#define E1000_I2C_T_SU_DATA 1 +#define E1000_I2C_T_RISE 1 +#define E1000_I2C_T_FALL 1 +#define E1000_I2C_T_SU_STO 4 +#define E1000_I2C_T_BUF 5 + +s32 e1000_set_i2c_bb(struct e1000_hw *hw); +s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); +void e1000_i2c_bus_clear(struct e1000_hw *hw); +#endif /* _E1000_82575_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c new file mode 100644 index 00000000..6095d3b4 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c @@ -0,0 +1,1159 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +/** + * e1000_init_mac_params - Initialize MAC function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the MAC + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_mac_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->mac.ops.init_params) { + ret_val = hw->mac.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("MAC Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("mac.init_mac_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_init_nvm_params - Initialize NVM function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the NVM + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_nvm_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->nvm.ops.init_params) { + ret_val = hw->nvm.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("NVM Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("nvm.init_nvm_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_init_phy_params - Initialize PHY function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the PHY + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_phy_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->phy.ops.init_params) { + ret_val = hw->phy.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("PHY Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("phy.init_phy_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_init_mbx_params - Initialize mailbox function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the PHY + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_mbx_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->mbx.ops.init_params) { + ret_val = hw->mbx.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("Mailbox Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("mbx.init_mbx_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_set_mac_type - Sets MAC type + * @hw: pointer to the HW structure + * + * This function sets the mac type of the adapter based on the + * device ID stored in the hw structure. + * MUST BE FIRST FUNCTION CALLED (explicitly or through + * e1000_setup_init_funcs()). + **/ +s32 e1000_set_mac_type(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_set_mac_type"); + + switch (hw->device_id) { + case E1000_DEV_ID_82575EB_COPPER: + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82575GB_QUAD_COPPER: + mac->type = e1000_82575; + break; + case E1000_DEV_ID_82576: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: + case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: + case E1000_DEV_ID_82576_NS: + case E1000_DEV_ID_82576_NS_SERDES: + case E1000_DEV_ID_82576_SERDES_QUAD: + mac->type = e1000_82576; + break; + case E1000_DEV_ID_82580_COPPER: + case E1000_DEV_ID_82580_FIBER: + case E1000_DEV_ID_82580_SERDES: + case E1000_DEV_ID_82580_SGMII: + case E1000_DEV_ID_82580_COPPER_DUAL: + case E1000_DEV_ID_82580_QUAD_FIBER: + case E1000_DEV_ID_DH89XXCC_SGMII: + case E1000_DEV_ID_DH89XXCC_SERDES: + case E1000_DEV_ID_DH89XXCC_BACKPLANE: + case E1000_DEV_ID_DH89XXCC_SFP: + mac->type = e1000_82580; + break; + case E1000_DEV_ID_I350_COPPER: + case E1000_DEV_ID_I350_FIBER: + case E1000_DEV_ID_I350_SERDES: + case E1000_DEV_ID_I350_SGMII: + case E1000_DEV_ID_I350_DA4: + mac->type = e1000_i350; + break; + case E1000_DEV_ID_I210_COPPER_FLASHLESS: + case E1000_DEV_ID_I210_SERDES_FLASHLESS: + case E1000_DEV_ID_I210_COPPER: + case E1000_DEV_ID_I210_COPPER_OEM1: + case E1000_DEV_ID_I210_COPPER_IT: + case E1000_DEV_ID_I210_FIBER: + case E1000_DEV_ID_I210_SERDES: + case E1000_DEV_ID_I210_SGMII: + mac->type = e1000_i210; + break; + case E1000_DEV_ID_I211_COPPER: + mac->type = e1000_i211; + break; + + case E1000_DEV_ID_I354_BACKPLANE_1GBPS: + case E1000_DEV_ID_I354_SGMII: + case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS: + mac->type = e1000_i354; + break; + default: + /* Should never have loaded on this device */ + ret_val = -E1000_ERR_MAC_INIT; + break; + } + + return ret_val; +} + +/** + * e1000_setup_init_funcs - Initializes function pointers + * @hw: pointer to the HW structure + * @init_device: true will initialize the rest of the function pointers + * getting the device ready for use. false will only set + * MAC type and the function pointers for the other init + * functions. Passing false will not generate any hardware + * reads or writes. + * + * This function must be called by a driver in order to use the rest + * of the 'shared' code files. Called by drivers only. + **/ +s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) +{ + s32 ret_val; + + /* Can't do much good without knowing the MAC type. */ + ret_val = e1000_set_mac_type(hw); + if (ret_val) { + DEBUGOUT("ERROR: MAC type could not be set properly.\n"); + goto out; + } + + if (!hw->hw_addr) { + DEBUGOUT("ERROR: Registers not mapped\n"); + ret_val = -E1000_ERR_CONFIG; + goto out; + } + + /* + * Init function pointers to generic implementations. We do this first + * allowing a driver module to override it afterward. + */ + e1000_init_mac_ops_generic(hw); + e1000_init_phy_ops_generic(hw); + e1000_init_nvm_ops_generic(hw); + e1000_init_mbx_ops_generic(hw); + + /* + * Set up the init function pointers. These are functions within the + * adapter family file that sets up function pointers for the rest of + * the functions in that family. + */ + switch (hw->mac.type) { + case e1000_82575: + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + e1000_init_function_pointers_82575(hw); + break; + case e1000_i210: + case e1000_i211: + e1000_init_function_pointers_i210(hw); + break; + default: + DEBUGOUT("Hardware not supported\n"); + ret_val = -E1000_ERR_CONFIG; + break; + } + + /* + * Initialize the rest of the function pointers. These require some + * register reads/writes in some cases. + */ + if (!(ret_val) && init_device) { + ret_val = e1000_init_mac_params(hw); + if (ret_val) + goto out; + + ret_val = e1000_init_nvm_params(hw); + if (ret_val) + goto out; + + ret_val = e1000_init_phy_params(hw); + if (ret_val) + goto out; + + ret_val = e1000_init_mbx_params(hw); + if (ret_val) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_get_bus_info - Obtain bus information for adapter + * @hw: pointer to the HW structure + * + * This will obtain information about the HW bus for which the + * adapter is attached and stores it in the hw structure. This is a + * function pointer entry point called by drivers. + **/ +s32 e1000_get_bus_info(struct e1000_hw *hw) +{ + if (hw->mac.ops.get_bus_info) + return hw->mac.ops.get_bus_info(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_clear_vfta - Clear VLAN filter table + * @hw: pointer to the HW structure + * + * This clears the VLAN filter table on the adapter. This is a function + * pointer entry point called by drivers. + **/ +void e1000_clear_vfta(struct e1000_hw *hw) +{ + if (hw->mac.ops.clear_vfta) + hw->mac.ops.clear_vfta(hw); +} + +/** + * e1000_write_vfta - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: the 32-bit offset in which to write the value to. + * @value: the 32-bit value to write at location offset. + * + * This writes a 32-bit value to a 32-bit offset in the VLAN filter + * table. This is a function pointer entry point called by drivers. + **/ +void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) +{ + if (hw->mac.ops.write_vfta) + hw->mac.ops.write_vfta(hw, offset, value); +} + +/** + * e1000_update_mc_addr_list - Update Multicast addresses + * @hw: pointer to the HW structure + * @mc_addr_list: array of multicast addresses to program + * @mc_addr_count: number of multicast addresses to program + * + * Updates the Multicast Table Array. + * The caller must have a packed mc_addr_list of multicast addresses. + **/ +void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count) +{ + if (hw->mac.ops.update_mc_addr_list) + hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, + mc_addr_count); +} + +/** + * e1000_force_mac_fc - Force MAC flow control + * @hw: pointer to the HW structure + * + * Force the MAC's flow control settings. Currently no func pointer exists + * and all implementations are handled in the generic version of this + * function. + **/ +s32 e1000_force_mac_fc(struct e1000_hw *hw) +{ + return e1000_force_mac_fc_generic(hw); +} + +/** + * e1000_check_for_link - Check/Store link connection + * @hw: pointer to the HW structure + * + * This checks the link condition of the adapter and stores the + * results in the hw->mac structure. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_check_for_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.check_for_link) + return hw->mac.ops.check_for_link(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_check_mng_mode - Check management mode + * @hw: pointer to the HW structure + * + * This checks if the adapter has manageability enabled. + * This is a function pointer entry point called by drivers. + **/ +bool e1000_check_mng_mode(struct e1000_hw *hw) +{ + if (hw->mac.ops.check_mng_mode) + return hw->mac.ops.check_mng_mode(hw); + + return false; +} + +/** + * e1000_mng_write_dhcp_info - Writes DHCP info to host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface + * @length: size of the buffer + * + * Writes the DHCP information to the host interface. + **/ +s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) +{ + return e1000_mng_write_dhcp_info_generic(hw, buffer, length); +} + +/** + * e1000_reset_hw - Reset hardware + * @hw: pointer to the HW structure + * + * This resets the hardware into a known state. This is a function pointer + * entry point called by drivers. + **/ +s32 e1000_reset_hw(struct e1000_hw *hw) +{ + if (hw->mac.ops.reset_hw) + return hw->mac.ops.reset_hw(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_init_hw - Initialize hardware + * @hw: pointer to the HW structure + * + * This inits the hardware readying it for operation. This is a function + * pointer entry point called by drivers. + **/ +s32 e1000_init_hw(struct e1000_hw *hw) +{ + if (hw->mac.ops.init_hw) + return hw->mac.ops.init_hw(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_setup_link - Configures link and flow control + * @hw: pointer to the HW structure + * + * This configures link and flow control settings for the adapter. This + * is a function pointer entry point called by drivers. While modules can + * also call this, they probably call their own version of this function. + **/ +s32 e1000_setup_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.setup_link) + return hw->mac.ops.setup_link(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_get_speed_and_duplex - Returns current speed and duplex + * @hw: pointer to the HW structure + * @speed: pointer to a 16-bit value to store the speed + * @duplex: pointer to a 16-bit value to store the duplex. + * + * This returns the speed and duplex of the adapter in the two 'out' + * variables passed in. This is a function pointer entry point called + * by drivers. + **/ +s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) +{ + if (hw->mac.ops.get_link_up_info) + return hw->mac.ops.get_link_up_info(hw, speed, duplex); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_setup_led - Configures SW controllable LED + * @hw: pointer to the HW structure + * + * This prepares the SW controllable LED for use and saves the current state + * of the LED so it can be later restored. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_setup_led(struct e1000_hw *hw) +{ + if (hw->mac.ops.setup_led) + return hw->mac.ops.setup_led(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_cleanup_led - Restores SW controllable LED + * @hw: pointer to the HW structure + * + * This restores the SW controllable LED to the value saved off by + * e1000_setup_led. This is a function pointer entry point called by drivers. + **/ +s32 e1000_cleanup_led(struct e1000_hw *hw) +{ + if (hw->mac.ops.cleanup_led) + return hw->mac.ops.cleanup_led(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_blink_led - Blink SW controllable LED + * @hw: pointer to the HW structure + * + * This starts the adapter LED blinking. Request the LED to be setup first + * and cleaned up after. This is a function pointer entry point called by + * drivers. + **/ +s32 e1000_blink_led(struct e1000_hw *hw) +{ + if (hw->mac.ops.blink_led) + return hw->mac.ops.blink_led(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_id_led_init - store LED configurations in SW + * @hw: pointer to the HW structure + * + * Initializes the LED config in SW. This is a function pointer entry point + * called by drivers. + **/ +s32 e1000_id_led_init(struct e1000_hw *hw) +{ + if (hw->mac.ops.id_led_init) + return hw->mac.ops.id_led_init(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_led_on - Turn on SW controllable LED + * @hw: pointer to the HW structure + * + * Turns the SW defined LED on. This is a function pointer entry point + * called by drivers. + **/ +s32 e1000_led_on(struct e1000_hw *hw) +{ + if (hw->mac.ops.led_on) + return hw->mac.ops.led_on(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_led_off - Turn off SW controllable LED + * @hw: pointer to the HW structure + * + * Turns the SW defined LED off. This is a function pointer entry point + * called by drivers. + **/ +s32 e1000_led_off(struct e1000_hw *hw) +{ + if (hw->mac.ops.led_off) + return hw->mac.ops.led_off(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_reset_adaptive - Reset adaptive IFS + * @hw: pointer to the HW structure + * + * Resets the adaptive IFS. Currently no func pointer exists and all + * implementations are handled in the generic version of this function. + **/ +void e1000_reset_adaptive(struct e1000_hw *hw) +{ + e1000_reset_adaptive_generic(hw); +} + +/** + * e1000_update_adaptive - Update adaptive IFS + * @hw: pointer to the HW structure + * + * Updates adapter IFS. Currently no func pointer exists and all + * implementations are handled in the generic version of this function. + **/ +void e1000_update_adaptive(struct e1000_hw *hw) +{ + e1000_update_adaptive_generic(hw); +} + +/** + * e1000_disable_pcie_master - Disable PCI-Express master access + * @hw: pointer to the HW structure + * + * Disables PCI-Express master access and verifies there are no pending + * requests. Currently no func pointer exists and all implementations are + * handled in the generic version of this function. + **/ +s32 e1000_disable_pcie_master(struct e1000_hw *hw) +{ + return e1000_disable_pcie_master_generic(hw); +} + +/** + * e1000_config_collision_dist - Configure collision distance + * @hw: pointer to the HW structure + * + * Configures the collision distance to the default value and is used + * during link setup. + **/ +void e1000_config_collision_dist(struct e1000_hw *hw) +{ + if (hw->mac.ops.config_collision_dist) + hw->mac.ops.config_collision_dist(hw); +} + +/** + * e1000_rar_set - Sets a receive address register + * @hw: pointer to the HW structure + * @addr: address to set the RAR to + * @index: the RAR to set + * + * Sets a Receive Address Register (RAR) to the specified address. + **/ +void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) +{ + if (hw->mac.ops.rar_set) + hw->mac.ops.rar_set(hw, addr, index); +} + +/** + * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state + * @hw: pointer to the HW structure + * + * Ensures that the MDI/MDIX SW state is valid. + **/ +s32 e1000_validate_mdi_setting(struct e1000_hw *hw) +{ + if (hw->mac.ops.validate_mdi_setting) + return hw->mac.ops.validate_mdi_setting(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_hash_mc_addr - Determines address location in multicast table + * @hw: pointer to the HW structure + * @mc_addr: Multicast address to hash. + * + * This hashes an address to determine its location in the multicast + * table. Currently no func pointer exists and all implementations + * are handled in the generic version of this function. + **/ +u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) +{ + return e1000_hash_mc_addr_generic(hw, mc_addr); +} + +/** + * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX + * @hw: pointer to the HW structure + * + * Enables packet filtering on transmit packets if manageability is enabled + * and host interface is enabled. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) +{ + return e1000_enable_tx_pkt_filtering_generic(hw); +} + +/** + * e1000_mng_host_if_write - Writes to the manageability host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface buffer + * @length: size of the buffer + * @offset: location in the buffer to write to + * @sum: sum of the data (not checksum) + * + * This function writes the buffer content at the offset given on the host if. + * It also does alignment considerations to do the writes in most efficient + * way. Also fills up the sum of the buffer in *buffer parameter. + **/ +s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, + u16 offset, u8 *sum) +{ + return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum); +} + +/** + * e1000_mng_write_cmd_header - Writes manageability command header + * @hw: pointer to the HW structure + * @hdr: pointer to the host interface command header + * + * Writes the command header after does the checksum calculation. + **/ +s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr) +{ + return e1000_mng_write_cmd_header_generic(hw, hdr); +} + +/** + * e1000_mng_enable_host_if - Checks host interface is enabled + * @hw: pointer to the HW structure + * + * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND + * + * This function checks whether the HOST IF is enabled for command operation + * and also checks whether the previous command is completed. It busy waits + * in case of previous command is not completed. + **/ +s32 e1000_mng_enable_host_if(struct e1000_hw *hw) +{ + return e1000_mng_enable_host_if_generic(hw); +} + +/** + * e1000_check_reset_block - Verifies PHY can be reset + * @hw: pointer to the HW structure + * + * Checks if the PHY is in a state that can be reset or if manageability + * has it tied up. This is a function pointer entry point called by drivers. + **/ +s32 e1000_check_reset_block(struct e1000_hw *hw) +{ + if (hw->phy.ops.check_reset_block) + return hw->phy.ops.check_reset_block(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_read_phy_reg - Reads PHY register + * @hw: pointer to the HW structure + * @offset: the register to read + * @data: the buffer to store the 16-bit read. + * + * Reads the PHY register and returns the value in data. + * This is a function pointer entry point called by drivers. + **/ +s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) +{ + if (hw->phy.ops.read_reg) + return hw->phy.ops.read_reg(hw, offset, data); + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg - Writes PHY register + * @hw: pointer to the HW structure + * @offset: the register to write + * @data: the value to write. + * + * Writes the PHY register at offset with the value in data. + * This is a function pointer entry point called by drivers. + **/ +s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) +{ + if (hw->phy.ops.write_reg) + return hw->phy.ops.write_reg(hw, offset, data); + + return E1000_SUCCESS; +} + +/** + * e1000_release_phy - Generic release PHY + * @hw: pointer to the HW structure + * + * Return if silicon family does not require a semaphore when accessing the + * PHY. + **/ +void e1000_release_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.release) + hw->phy.ops.release(hw); +} + +/** + * e1000_acquire_phy - Generic acquire PHY + * @hw: pointer to the HW structure + * + * Return success if silicon family does not require a semaphore when + * accessing the PHY. + **/ +s32 e1000_acquire_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.acquire) + return hw->phy.ops.acquire(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_read_kmrn_reg - Reads register using Kumeran interface + * @hw: pointer to the HW structure + * @offset: the register to read + * @data: the location to store the 16-bit value read. + * + * Reads a register out of the Kumeran interface. Currently no func pointer + * exists and all implementations are handled in the generic version of + * this function. + **/ +s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return e1000_read_kmrn_reg_generic(hw, offset, data); +} + +/** + * e1000_write_kmrn_reg - Writes register using Kumeran interface + * @hw: pointer to the HW structure + * @offset: the register to write + * @data: the value to write. + * + * Writes a register to the Kumeran interface. Currently no func pointer + * exists and all implementations are handled in the generic version of + * this function. + **/ +s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) +{ + return e1000_write_kmrn_reg_generic(hw, offset, data); +} + +/** + * e1000_get_cable_length - Retrieves cable length estimation + * @hw: pointer to the HW structure + * + * This function estimates the cable length and stores them in + * hw->phy.min_length and hw->phy.max_length. This is a function pointer + * entry point called by drivers. + **/ +s32 e1000_get_cable_length(struct e1000_hw *hw) +{ + if (hw->phy.ops.get_cable_length) + return hw->phy.ops.get_cable_length(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_get_phy_info - Retrieves PHY information from registers + * @hw: pointer to the HW structure + * + * This function gets some information from various PHY registers and + * populates hw->phy values with it. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_get_phy_info(struct e1000_hw *hw) +{ + if (hw->phy.ops.get_info) + return hw->phy.ops.get_info(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_phy_hw_reset - Hard PHY reset + * @hw: pointer to the HW structure + * + * Performs a hard PHY reset. This is a function pointer entry point called + * by drivers. + **/ +s32 e1000_phy_hw_reset(struct e1000_hw *hw) +{ + if (hw->phy.ops.reset) + return hw->phy.ops.reset(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_phy_commit - Soft PHY reset + * @hw: pointer to the HW structure + * + * Performs a soft PHY reset on those that apply. This is a function pointer + * entry point called by drivers. + **/ +s32 e1000_phy_commit(struct e1000_hw *hw) +{ + if (hw->phy.ops.commit) + return hw->phy.ops.commit(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_set_d0_lplu_state - Sets low power link up state for D0 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D0 + * and SmartSpeed is disabled when active is true, else clear lplu for D0 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. This is a function pointer entry point called by drivers. + **/ +s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) +{ + if (hw->phy.ops.set_d0_lplu_state) + return hw->phy.ops.set_d0_lplu_state(hw, active); + + return E1000_SUCCESS; +} + +/** + * e1000_set_d3_lplu_state - Sets low power link up state for D3 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D3 + * and SmartSpeed is disabled when active is true, else clear lplu for D3 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. This is a function pointer entry point called by drivers. + **/ +s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) +{ + if (hw->phy.ops.set_d3_lplu_state) + return hw->phy.ops.set_d3_lplu_state(hw, active); + + return E1000_SUCCESS; +} + +/** + * e1000_read_mac_addr - Reads MAC address + * @hw: pointer to the HW structure + * + * Reads the MAC address out of the adapter and stores it in the HW structure. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +s32 e1000_read_mac_addr(struct e1000_hw *hw) +{ + if (hw->mac.ops.read_mac_addr) + return hw->mac.ops.read_mac_addr(hw); + + return e1000_read_mac_addr_generic(hw); +} + +/** + * e1000_read_pba_string - Read device part number string + * @hw: pointer to the HW structure + * @pba_num: pointer to device part number + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number from the EEPROM and stores + * the value in pba_num. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) +{ + return e1000_read_pba_string_generic(hw, pba_num, pba_num_size); +} + +/** + * e1000_read_pba_length - Read device part number string length + * @hw: pointer to the HW structure + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number length from the EEPROM and + * stores the value in pba_num. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size) +{ + return e1000_read_pba_length_generic(hw, pba_num_size); +} + +/** + * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum + * @hw: pointer to the HW structure + * + * Validates the NVM checksum is correct. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) +{ + if (hw->nvm.ops.validate) + return hw->nvm.ops.validate(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum + * @hw: pointer to the HW structure + * + * Updates the NVM checksum. Currently no func pointer exists and all + * implementations are handled in the generic version of this function. + **/ +s32 e1000_update_nvm_checksum(struct e1000_hw *hw) +{ + if (hw->nvm.ops.update) + return hw->nvm.ops.update(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_reload_nvm - Reloads EEPROM + * @hw: pointer to the HW structure + * + * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the + * extended control register. + **/ +void e1000_reload_nvm(struct e1000_hw *hw) +{ + if (hw->nvm.ops.reload) + hw->nvm.ops.reload(hw); +} + +/** + * e1000_read_nvm - Reads NVM (EEPROM) + * @hw: pointer to the HW structure + * @offset: the word offset to read + * @words: number of 16-bit words to read + * @data: pointer to the properly sized buffer for the data. + * + * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function + * pointer entry point called by drivers. + **/ +s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + if (hw->nvm.ops.read) + return hw->nvm.ops.read(hw, offset, words, data); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_write_nvm - Writes to NVM (EEPROM) + * @hw: pointer to the HW structure + * @offset: the word offset to read + * @words: number of 16-bit words to write + * @data: pointer to the properly sized buffer for the data. + * + * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function + * pointer entry point called by drivers. + **/ +s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + if (hw->nvm.ops.write) + return hw->nvm.ops.write(hw, offset, words, data); + + return E1000_SUCCESS; +} + +/** + * e1000_write_8bit_ctrl_reg - Writes 8bit Control register + * @hw: pointer to the HW structure + * @reg: 32bit register offset + * @offset: the register to write + * @data: the value to write. + * + * Writes the PHY register at offset with the value in data. + * This is a function pointer entry point called by drivers. + **/ +s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, + u8 data) +{ + return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data); +} + +/** + * e1000_power_up_phy - Restores link in case of PHY power down + * @hw: pointer to the HW structure + * + * The phy may be powered down to save power, to turn off link when the + * driver is unloaded, or wake on lan is not enabled (among others). + **/ +void e1000_power_up_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.power_up) + hw->phy.ops.power_up(hw); + + e1000_setup_link(hw); +} + +/** + * e1000_power_down_phy - Power down PHY + * @hw: pointer to the HW structure + * + * The phy may be powered down to save power, to turn off link when the + * driver is unloaded, or wake on lan is not enabled (among others). + **/ +void e1000_power_down_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.power_down) + hw->phy.ops.power_down(hw); +} + +/** + * e1000_power_up_fiber_serdes_link - Power up serdes link + * @hw: pointer to the HW structure + * + * Power on the optics and PCS. + **/ +void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.power_up_serdes) + hw->mac.ops.power_up_serdes(hw); +} + +/** + * e1000_shutdown_fiber_serdes_link - Remove link during power down + * @hw: pointer to the HW structure + * + * Shutdown the optics and PCS on driver unload. + **/ +void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.shutdown_serdes) + hw->mac.ops.shutdown_serdes(hw); +} + +/** + * e1000_get_thermal_sensor_data - Gathers thermal sensor data + * @hw: pointer to hardware structure + * + * Updates the temperatures in mac.thermal_sensor_data + **/ +s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw) +{ + if (hw->mac.ops.get_thermal_sensor_data) + return hw->mac.ops.get_thermal_sensor_data(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_init_thermal_sensor_thresh - Sets thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Sets the thermal sensor thresholds according to the NVM map + **/ +s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw) +{ + if (hw->mac.ops.init_thermal_sensor_thresh) + return hw->mac.ops.init_thermal_sensor_thresh(hw); + + return E1000_SUCCESS; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h new file mode 100644 index 00000000..b21294ec --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h @@ -0,0 +1,157 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_API_H_ +#define _E1000_API_H_ + +#include "e1000_hw.h" + +extern void e1000_init_function_pointers_82575(struct e1000_hw *hw); +extern void e1000_rx_fifo_flush_82575(struct e1000_hw *hw); +extern void e1000_init_function_pointers_vf(struct e1000_hw *hw); +extern void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw); +extern void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw); +extern void e1000_init_function_pointers_i210(struct e1000_hw *hw); + +s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr); +s32 e1000_set_mac_type(struct e1000_hw *hw); +s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device); +s32 e1000_init_mac_params(struct e1000_hw *hw); +s32 e1000_init_nvm_params(struct e1000_hw *hw); +s32 e1000_init_phy_params(struct e1000_hw *hw); +s32 e1000_init_mbx_params(struct e1000_hw *hw); +s32 e1000_get_bus_info(struct e1000_hw *hw); +void e1000_clear_vfta(struct e1000_hw *hw); +void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); +s32 e1000_force_mac_fc(struct e1000_hw *hw); +s32 e1000_check_for_link(struct e1000_hw *hw); +s32 e1000_reset_hw(struct e1000_hw *hw); +s32 e1000_init_hw(struct e1000_hw *hw); +s32 e1000_setup_link(struct e1000_hw *hw); +s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex); +s32 e1000_disable_pcie_master(struct e1000_hw *hw); +void e1000_config_collision_dist(struct e1000_hw *hw); +void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index); +u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr); +void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count); +s32 e1000_setup_led(struct e1000_hw *hw); +s32 e1000_cleanup_led(struct e1000_hw *hw); +s32 e1000_check_reset_block(struct e1000_hw *hw); +s32 e1000_blink_led(struct e1000_hw *hw); +s32 e1000_led_on(struct e1000_hw *hw); +s32 e1000_led_off(struct e1000_hw *hw); +s32 e1000_id_led_init(struct e1000_hw *hw); +void e1000_reset_adaptive(struct e1000_hw *hw); +void e1000_update_adaptive(struct e1000_hw *hw); +s32 e1000_get_cable_length(struct e1000_hw *hw); +s32 e1000_validate_mdi_setting(struct e1000_hw *hw); +s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, + u8 data); +s32 e1000_get_phy_info(struct e1000_hw *hw); +void e1000_release_phy(struct e1000_hw *hw); +s32 e1000_acquire_phy(struct e1000_hw *hw); +s32 e1000_phy_hw_reset(struct e1000_hw *hw); +s32 e1000_phy_commit(struct e1000_hw *hw); +void e1000_power_up_phy(struct e1000_hw *hw); +void e1000_power_down_phy(struct e1000_hw *hw); +s32 e1000_read_mac_addr(struct e1000_hw *hw); +s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size); +s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size); +void e1000_reload_nvm(struct e1000_hw *hw); +s32 e1000_update_nvm_checksum(struct e1000_hw *hw); +s32 e1000_validate_nvm_checksum(struct e1000_hw *hw); +s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active); +s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active); +bool e1000_check_mng_mode(struct e1000_hw *hw); +bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw); +s32 e1000_mng_enable_host_if(struct e1000_hw *hw); +s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, + u16 offset, u8 *sum); +s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr); +s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length); +s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw); +s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw); + + + +/* + * TBI_ACCEPT macro definition: + * + * This macro requires: + * adapter = a pointer to struct e1000_hw + * status = the 8 bit status field of the Rx descriptor with EOP set + * error = the 8 bit error field of the Rx descriptor with EOP set + * length = the sum of all the length fields of the Rx descriptors that + * make up the current frame + * last_byte = the last byte of the frame DMAed by the hardware + * max_frame_length = the maximum frame length we want to accept. + * min_frame_length = the minimum frame length we want to accept. + * + * This macro is a conditional that should be used in the interrupt + * handler's Rx processing routine when RxErrors have been detected. + * + * Typical use: + * ... + * if (TBI_ACCEPT) { + * accept_frame = true; + * e1000_tbi_adjust_stats(adapter, MacAddress); + * frame_length--; + * } else { + * accept_frame = false; + * } + * ... + */ + +/* The carrier extension symbol, as received by the NIC. */ +#define CARRIER_EXTENSION 0x0F + +#define TBI_ACCEPT(a, status, errors, length, last_byte, \ + min_frame_size, max_frame_size) \ + (e1000_tbi_sbp_enabled_82543(a) && \ + (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \ + ((last_byte) == CARRIER_EXTENSION) && \ + (((status) & E1000_RXD_STAT_VP) ? \ + (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \ + ((length) <= (max_frame_size + 1))) : \ + (((length) > min_frame_size) && \ + ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1))))) + +#ifndef E1000_MAX +#define E1000_MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef E1000_DIVIDE_ROUND_UP +#define E1000_DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) /* ceil(a/b) */ +#endif +#endif /* _E1000_API_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h new file mode 100644 index 00000000..63b228c5 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h @@ -0,0 +1,1380 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_DEFINES_H_ +#define _E1000_DEFINES_H_ + +/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ +#define REQ_TX_DESCRIPTOR_MULTIPLE 8 +#define REQ_RX_DESCRIPTOR_MULTIPLE 8 + +/* Definitions for power management and wakeup registers */ +/* Wake Up Control */ +#define E1000_WUC_APME 0x00000001 /* APM Enable */ +#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ +#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */ +#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */ +#define E1000_WUC_PHY_WAKE 0x00000100 /* if PHY supports wakeup */ + +/* Wake Up Filter Control */ +#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ +#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ +#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ + +/* Wake Up Status */ +#define E1000_WUS_LNKC E1000_WUFC_LNKC +#define E1000_WUS_MAG E1000_WUFC_MAG +#define E1000_WUS_EX E1000_WUFC_EX +#define E1000_WUS_MC E1000_WUFC_MC +#define E1000_WUS_BC E1000_WUFC_BC + +/* Extended Device Control */ +#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* SW Definable Pin 4 data */ +#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* SW Definable Pin 6 data */ +#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* SW Definable Pin 3 data */ +#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */ +#define E1000_CTRL_EXT_SDP3_DIR 0x00000800 /* Direction of SDP3 0=in 1=out */ +#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ +/* Physical Func Reset Done Indication */ +#define E1000_CTRL_EXT_PFRSTD 0x00004000 +#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ +#define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ +#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clk Gating */ +#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 +/* Offset of the link mode field in Ctrl Ext register */ +#define E1000_CTRL_EXT_LINK_MODE_OFFSET 22 +#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 +#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 +#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 +#define E1000_CTRL_EXT_EIAME 0x01000000 +#define E1000_CTRL_EXT_IRCA 0x00000001 +#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ +#define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ +#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ +#define E1000_I2CCMD_REG_ADDR_SHIFT 16 +#define E1000_I2CCMD_PHY_ADDR_SHIFT 24 +#define E1000_I2CCMD_OPCODE_READ 0x08000000 +#define E1000_I2CCMD_OPCODE_WRITE 0x00000000 +#define E1000_I2CCMD_READY 0x20000000 +#define E1000_I2CCMD_ERROR 0x80000000 +#define E1000_I2CCMD_SFP_DATA_ADDR(a) (0x0000 + (a)) +#define E1000_I2CCMD_SFP_DIAG_ADDR(a) (0x0100 + (a)) +#define E1000_MAX_SGMII_PHY_REG_ADDR 255 +#define E1000_I2CCMD_PHY_TIMEOUT 200 +#define E1000_IVAR_VALID 0x80 +#define E1000_GPIE_NSICR 0x00000001 +#define E1000_GPIE_MSIX_MODE 0x00000010 +#define E1000_GPIE_EIAME 0x40000000 +#define E1000_GPIE_PBA 0x80000000 + +/* Receive Descriptor bit definitions */ +#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ +#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ +#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ +#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ +#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ +#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ +#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ +#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ +#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ +#define E1000_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ +#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ +#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ +#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ +#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ +#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ +#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ +#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ +#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ + +#define E1000_RXDEXT_STATERR_TST 0x00000100 /* Time Stamp taken */ +#define E1000_RXDEXT_STATERR_LB 0x00040000 +#define E1000_RXDEXT_STATERR_CE 0x01000000 +#define E1000_RXDEXT_STATERR_SE 0x02000000 +#define E1000_RXDEXT_STATERR_SEQ 0x04000000 +#define E1000_RXDEXT_STATERR_CXE 0x10000000 +#define E1000_RXDEXT_STATERR_TCPE 0x20000000 +#define E1000_RXDEXT_STATERR_IPE 0x40000000 +#define E1000_RXDEXT_STATERR_RXE 0x80000000 + +/* mask to determine if packets should be dropped due to frame errors */ +#define E1000_RXD_ERR_FRAME_ERR_MASK ( \ + E1000_RXD_ERR_CE | \ + E1000_RXD_ERR_SE | \ + E1000_RXD_ERR_SEQ | \ + E1000_RXD_ERR_CXE | \ + E1000_RXD_ERR_RXE) + +/* Same mask, but for extended and packet split descriptors */ +#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ + E1000_RXDEXT_STATERR_CE | \ + E1000_RXDEXT_STATERR_SE | \ + E1000_RXDEXT_STATERR_SEQ | \ + E1000_RXDEXT_STATERR_CXE | \ + E1000_RXDEXT_STATERR_RXE) + +#define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000 +#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 +#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 +#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 +#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 +#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 + +#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 + +/* Management Control */ +#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ +#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ +#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ +#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ +#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ +/* Enable MAC address filtering */ +#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 +/* Enable MNG packets to host memory */ +#define E1000_MANC_EN_MNG2HOST 0x00200000 + +#define E1000_MANC2H_PORT_623 0x00000020 /* Port 0x26f */ +#define E1000_MANC2H_PORT_664 0x00000040 /* Port 0x298 */ +#define E1000_MDEF_PORT_623 0x00000800 /* Port 0x26f */ +#define E1000_MDEF_PORT_664 0x00000400 /* Port 0x298 */ + +/* Receive Control */ +#define E1000_RCTL_RST 0x00000001 /* Software reset */ +#define E1000_RCTL_EN 0x00000002 /* enable */ +#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ +#define E1000_RCTL_UPE 0x00000008 /* unicast promisc enable */ +#define E1000_RCTL_MPE 0x00000010 /* multicast promisc enable */ +#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ +#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ +#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ +#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ +#define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ +#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ +#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ +#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ +#define E1000_RCTL_SZ_2048 0x00000000 /* Rx buffer size 2048 */ +#define E1000_RCTL_SZ_1024 0x00010000 /* Rx buffer size 1024 */ +#define E1000_RCTL_SZ_512 0x00020000 /* Rx buffer size 512 */ +#define E1000_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ +#define E1000_RCTL_SZ_16384 0x00010000 /* Rx buffer size 16384 */ +#define E1000_RCTL_SZ_8192 0x00020000 /* Rx buffer size 8192 */ +#define E1000_RCTL_SZ_4096 0x00030000 /* Rx buffer size 4096 */ +#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ +#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ +#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ +#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ +#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ + +/* Use byte values for the following shift parameters + * Usage: + * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & + * E1000_PSRCTL_BSIZE0_MASK) | + * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & + * E1000_PSRCTL_BSIZE1_MASK) | + * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & + * E1000_PSRCTL_BSIZE2_MASK) | + * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; + * E1000_PSRCTL_BSIZE3_MASK)) + * where value0 = [128..16256], default=256 + * value1 = [1024..64512], default=4096 + * value2 = [0..64512], default=4096 + * value3 = [0..64512], default=0 + */ + +#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F +#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 +#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 +#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 + +#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ +#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ +#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ +#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ + +/* SWFW_SYNC Definitions */ +#define E1000_SWFW_EEP_SM 0x01 +#define E1000_SWFW_PHY0_SM 0x02 +#define E1000_SWFW_PHY1_SM 0x04 +#define E1000_SWFW_CSR_SM 0x08 +#define E1000_SWFW_PHY2_SM 0x20 +#define E1000_SWFW_PHY3_SM 0x40 +#define E1000_SWFW_SW_MNG_SM 0x400 + +/* Device Control */ +#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ +#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ +#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master reqs */ +#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ +#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ +#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ +#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ +#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ +#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ +#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ +#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ +#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ +#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ +#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ +#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ +#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ +#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ +#define E1000_CTRL_RST 0x04000000 /* Global reset */ +#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ +#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ +#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ +#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ +#define E1000_CTRL_I2C_ENA 0x02000000 /* I2C enable */ + + +#define E1000_CONNSW_ENRGSRC 0x4 +#define E1000_CONNSW_PHYSD 0x400 +#define E1000_CONNSW_PHY_PDN 0x800 +#define E1000_CONNSW_SERDESD 0x200 +#define E1000_CONNSW_AUTOSENSE_CONF 0x2 +#define E1000_CONNSW_AUTOSENSE_EN 0x1 +#define E1000_PCS_CFG_PCS_EN 8 +#define E1000_PCS_LCTL_FLV_LINK_UP 1 +#define E1000_PCS_LCTL_FSV_10 0 +#define E1000_PCS_LCTL_FSV_100 2 +#define E1000_PCS_LCTL_FSV_1000 4 +#define E1000_PCS_LCTL_FDV_FULL 8 +#define E1000_PCS_LCTL_FSD 0x10 +#define E1000_PCS_LCTL_FORCE_LINK 0x20 +#define E1000_PCS_LCTL_FORCE_FCTRL 0x80 +#define E1000_PCS_LCTL_AN_ENABLE 0x10000 +#define E1000_PCS_LCTL_AN_RESTART 0x20000 +#define E1000_PCS_LCTL_AN_TIMEOUT 0x40000 +#define E1000_ENABLE_SERDES_LOOPBACK 0x0410 + +#define E1000_PCS_LSTS_LINK_OK 1 +#define E1000_PCS_LSTS_SPEED_100 2 +#define E1000_PCS_LSTS_SPEED_1000 4 +#define E1000_PCS_LSTS_DUPLEX_FULL 8 +#define E1000_PCS_LSTS_SYNK_OK 0x10 +#define E1000_PCS_LSTS_AN_COMPLETE 0x10000 + +/* Device Status */ +#define E1000_STATUS_FD 0x00000001 /* Duplex 0=half 1=full */ +#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ +#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ +#define E1000_STATUS_FUNC_SHIFT 2 +#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ +#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ +#define E1000_STATUS_SPEED_MASK 0x000000C0 +#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ +#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ +#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ +#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Compltn by NVM */ +#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ +#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master request status */ +#define E1000_STATUS_2P5_SKU 0x00001000 /* Val of 2.5GBE SKU strap */ +#define E1000_STATUS_2P5_SKU_OVER 0x00002000 /* Val of 2.5GBE SKU Over */ + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_2500 2500 +#define HALF_DUPLEX 1 +#define FULL_DUPLEX 2 + + +#define ADVERTISE_10_HALF 0x0001 +#define ADVERTISE_10_FULL 0x0002 +#define ADVERTISE_100_HALF 0x0004 +#define ADVERTISE_100_FULL 0x0008 +#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ +#define ADVERTISE_1000_FULL 0x0020 + +/* 1000/H is not supported, nor spec-compliant. */ +#define E1000_ALL_SPEED_DUPLEX ( \ + ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ + ADVERTISE_100_FULL | ADVERTISE_1000_FULL) +#define E1000_ALL_NOT_GIG ( \ + ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ + ADVERTISE_100_FULL) +#define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL) +#define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL) +#define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF) + +#define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX + +/* LED Control */ +#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F +#define E1000_LEDCTL_LED0_MODE_SHIFT 0 +#define E1000_LEDCTL_LED0_IVRT 0x00000040 +#define E1000_LEDCTL_LED0_BLINK 0x00000080 + +#define E1000_LEDCTL_MODE_LED_ON 0xE +#define E1000_LEDCTL_MODE_LED_OFF 0xF + +/* Transmit Descriptor bit definitions */ +#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ +#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ +#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ +#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ +#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ +#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ +#define E1000_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ +#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ +#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ +#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ +#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ +#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ +#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ +#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ +#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ +#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ +#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ +#define E1000_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ + +/* Transmit Control */ +#define E1000_TCTL_EN 0x00000002 /* enable Tx */ +#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ +#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ +#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ +#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ +#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ + +/* Transmit Arbitration Count */ +#define E1000_TARC0_ENABLE 0x00000400 /* Enable Tx Queue 0 */ + +/* SerDes Control */ +#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 +#define E1000_SCTL_ENABLE_SERDES_LOOPBACK 0x0410 + +/* Receive Checksum Control */ +#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ +#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ +#define E1000_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ +#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ +#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ + +/* Header split receive */ +#define E1000_RFCTL_NFSW_DIS 0x00000040 +#define E1000_RFCTL_NFSR_DIS 0x00000080 +#define E1000_RFCTL_ACK_DIS 0x00001000 +#define E1000_RFCTL_EXTEN 0x00008000 +#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 +#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 +#define E1000_RFCTL_LEF 0x00040000 + +/* Collision related configuration parameters */ +#define E1000_COLLISION_THRESHOLD 15 +#define E1000_CT_SHIFT 4 +#define E1000_COLLISION_DISTANCE 63 +#define E1000_COLD_SHIFT 12 + +/* Default values for the transmit IPG register */ +#define DEFAULT_82543_TIPG_IPGT_FIBER 9 +#define DEFAULT_82543_TIPG_IPGT_COPPER 8 + +#define E1000_TIPG_IPGT_MASK 0x000003FF + +#define DEFAULT_82543_TIPG_IPGR1 8 +#define E1000_TIPG_IPGR1_SHIFT 10 + +#define DEFAULT_82543_TIPG_IPGR2 6 +#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 +#define E1000_TIPG_IPGR2_SHIFT 20 + +/* Ethertype field values */ +#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ + +#define ETHERNET_FCS_SIZE 4 +#define MAX_JUMBO_FRAME_SIZE 0x3F00 + +/* Extended Configuration Control and Size */ +#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020 +#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001 +#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE 0x00000008 +#define E1000_EXTCNF_CTRL_SWFLAG 0x00000020 +#define E1000_EXTCNF_CTRL_GATE_PHY_CFG 0x00000080 +#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK 0x00FF0000 +#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT 16 +#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK 0x0FFF0000 +#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT 16 + +#define E1000_PHY_CTRL_D0A_LPLU 0x00000002 +#define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004 +#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008 +#define E1000_PHY_CTRL_GBE_DISABLE 0x00000040 + +#define E1000_KABGTXD_BGSQLBIAS 0x00050000 + +/* PBA constants */ +#define E1000_PBA_8K 0x0008 /* 8KB */ +#define E1000_PBA_10K 0x000A /* 10KB */ +#define E1000_PBA_12K 0x000C /* 12KB */ +#define E1000_PBA_14K 0x000E /* 14KB */ +#define E1000_PBA_16K 0x0010 /* 16KB */ +#define E1000_PBA_18K 0x0012 +#define E1000_PBA_20K 0x0014 +#define E1000_PBA_22K 0x0016 +#define E1000_PBA_24K 0x0018 +#define E1000_PBA_26K 0x001A +#define E1000_PBA_30K 0x001E +#define E1000_PBA_32K 0x0020 +#define E1000_PBA_34K 0x0022 +#define E1000_PBA_35K 0x0023 +#define E1000_PBA_38K 0x0026 +#define E1000_PBA_40K 0x0028 +#define E1000_PBA_48K 0x0030 /* 48KB */ +#define E1000_PBA_64K 0x0040 /* 64KB */ + +#define E1000_PBA_RXA_MASK 0xFFFF + +#define E1000_PBS_16K E1000_PBA_16K + +#define IFS_MAX 80 +#define IFS_MIN 40 +#define IFS_RATIO 4 +#define IFS_STEP 10 +#define MIN_NUM_XMITS 1000 + +/* SW Semaphore Register */ +#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ +#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ +#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ + +#define E1000_SWSM2_LOCK 0x00000002 /* Secondary driver semaphore bit */ + +/* Interrupt Cause Read */ +#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ +#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ +#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ +#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ +#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ +#define E1000_ICR_RXO 0x00000040 /* Rx overrun */ +#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ +#define E1000_ICR_VMMB 0x00000100 /* VM MB event */ +#define E1000_ICR_RXCFG 0x00000400 /* Rx /c/ ordered set */ +#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ +#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ +#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ +#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ +#define E1000_ICR_TXD_LOW 0x00008000 +#define E1000_ICR_MNG 0x00040000 /* Manageability event */ +#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ +#define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ +/* If this bit asserted, the driver should claim the interrupt */ +#define E1000_ICR_INT_ASSERTED 0x80000000 +#define E1000_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ +#define E1000_ICR_FER 0x00400000 /* Fatal Error */ + +#define E1000_ICR_THS 0x00800000 /* ICR.THS: Thermal Sensor Event*/ +#define E1000_ICR_MDDET 0x10000000 /* Malicious Driver Detect */ + + +/* Extended Interrupt Cause Read */ +#define E1000_EICR_RX_QUEUE0 0x00000001 /* Rx Queue 0 Interrupt */ +#define E1000_EICR_RX_QUEUE1 0x00000002 /* Rx Queue 1 Interrupt */ +#define E1000_EICR_RX_QUEUE2 0x00000004 /* Rx Queue 2 Interrupt */ +#define E1000_EICR_RX_QUEUE3 0x00000008 /* Rx Queue 3 Interrupt */ +#define E1000_EICR_TX_QUEUE0 0x00000100 /* Tx Queue 0 Interrupt */ +#define E1000_EICR_TX_QUEUE1 0x00000200 /* Tx Queue 1 Interrupt */ +#define E1000_EICR_TX_QUEUE2 0x00000400 /* Tx Queue 2 Interrupt */ +#define E1000_EICR_TX_QUEUE3 0x00000800 /* Tx Queue 3 Interrupt */ +#define E1000_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ +#define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ +/* TCP Timer */ +#define E1000_TCPTIMER_KS 0x00000100 /* KickStart */ +#define E1000_TCPTIMER_COUNT_ENABLE 0x00000200 /* Count Enable */ +#define E1000_TCPTIMER_COUNT_FINISH 0x00000400 /* Count finish */ +#define E1000_TCPTIMER_LOOP 0x00000800 /* Loop */ + +/* This defines the bits that are set in the Interrupt Mask + * Set/Read Register. Each bit is documented below: + * o RXT0 = Receiver Timer Interrupt (ring 0) + * o TXDW = Transmit Descriptor Written Back + * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) + * o RXSEQ = Receive Sequence Error + * o LSC = Link Status Change + */ +#define IMS_ENABLE_MASK ( \ + E1000_IMS_RXT0 | \ + E1000_IMS_TXDW | \ + E1000_IMS_RXDMT0 | \ + E1000_IMS_RXSEQ | \ + E1000_IMS_LSC) + +/* Interrupt Mask Set */ +#define E1000_IMS_TXDW E1000_ICR_TXDW /* Tx desc written back */ +#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ +#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ +#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ +#define E1000_IMS_RXO E1000_ICR_RXO /* Rx overrun */ +#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ +#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ +#define E1000_IMS_DRSTA E1000_ICR_DRSTA /* Device Reset Asserted */ +#define E1000_IMS_DOUTSYNC E1000_ICR_DOUTSYNC /* NIC DMA out of sync */ +#define E1000_IMS_FER E1000_ICR_FER /* Fatal Error */ + +#define E1000_IMS_THS E1000_ICR_THS /* ICR.TS: Thermal Sensor Event*/ +#define E1000_IMS_MDDET E1000_ICR_MDDET /* Malicious Driver Detect */ +/* Extended Interrupt Mask Set */ +#define E1000_EIMS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ +#define E1000_EIMS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ +#define E1000_EIMS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ +#define E1000_EIMS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ +#define E1000_EIMS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ +#define E1000_EIMS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ +#define E1000_EIMS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ +#define E1000_EIMS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ +#define E1000_EIMS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ +#define E1000_EIMS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ + +/* Interrupt Cause Set */ +#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ +#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ + +/* Extended Interrupt Cause Set */ +#define E1000_EICS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ +#define E1000_EICS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ +#define E1000_EICS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ +#define E1000_EICS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ +#define E1000_EICS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ +#define E1000_EICS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ +#define E1000_EICS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ +#define E1000_EICS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ +#define E1000_EICS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ +#define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ + +#define E1000_EITR_ITR_INT_MASK 0x0000FFFF +/* E1000_EITR_CNT_IGNR is only for 82576 and newer */ +#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ +#define E1000_EITR_INTERVAL 0x00007FFC + +/* Transmit Descriptor Control */ +#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ +#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */ +#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */ +#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ +#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ +#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */ +/* Enable the counting of descriptors still to be processed. */ +#define E1000_TXDCTL_COUNT_DESC 0x00400000 + +/* Flow Control Constants */ +#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 +#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 +#define FLOW_CONTROL_TYPE 0x8808 + +/* 802.1q VLAN Packet Size */ +#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */ +#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ + +/* Receive Address + * Number of high/low register pairs in the RAR. The RAR (Receive Address + * Registers) holds the directed and multicast addresses that we monitor. + * Technically, we have 16 spots. However, we reserve one of these spots + * (RAR[15]) for our directed address used by controllers with + * manageability enabled, allowing us room for 15 multicast addresses. + */ +#define E1000_RAR_ENTRIES 15 +#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ +#define E1000_RAL_MAC_ADDR_LEN 4 +#define E1000_RAH_MAC_ADDR_LEN 2 +#define E1000_RAH_QUEUE_MASK_82575 0x000C0000 +#define E1000_RAH_POOL_1 0x00040000 + +/* Error Codes */ +#define E1000_SUCCESS 0 +#define E1000_ERR_NVM 1 +#define E1000_ERR_PHY 2 +#define E1000_ERR_CONFIG 3 +#define E1000_ERR_PARAM 4 +#define E1000_ERR_MAC_INIT 5 +#define E1000_ERR_PHY_TYPE 6 +#define E1000_ERR_RESET 9 +#define E1000_ERR_MASTER_REQUESTS_PENDING 10 +#define E1000_ERR_HOST_INTERFACE_COMMAND 11 +#define E1000_BLK_PHY_RESET 12 +#define E1000_ERR_SWFW_SYNC 13 +#define E1000_NOT_IMPLEMENTED 14 +#define E1000_ERR_MBX 15 +#define E1000_ERR_INVALID_ARGUMENT 16 +#define E1000_ERR_NO_SPACE 17 +#define E1000_ERR_NVM_PBA_SECTION 18 +#define E1000_ERR_I2C 19 +#define E1000_ERR_INVM_VALUE_NOT_FOUND 20 + +/* Loop limit on how long we wait for auto-negotiation to complete */ +#define FIBER_LINK_UP_LIMIT 50 +#define COPPER_LINK_UP_LIMIT 10 +#define PHY_AUTO_NEG_LIMIT 45 +#define PHY_FORCE_LIMIT 20 +/* Number of 100 microseconds we wait for PCI Express master disable */ +#define MASTER_DISABLE_TIMEOUT 800 +/* Number of milliseconds we wait for PHY configuration done after MAC reset */ +#define PHY_CFG_TIMEOUT 100 +/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */ +#define MDIO_OWNERSHIP_TIMEOUT 10 +/* Number of milliseconds for NVM auto read done after MAC reset. */ +#define AUTO_READ_DONE_TIMEOUT 10 + +/* Flow Control */ +#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ +#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ +#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ + +/* Transmit Configuration Word */ +#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ +#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ +#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ +#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ +#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ + +/* Receive Configuration Word */ +#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ +#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ +#define E1000_RXCW_C 0x20000000 /* Receive config */ +#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ + +#define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ +#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ + +#define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ +#define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ +#define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 +#define E1000_TSYNCRXCTL_TYPE_L4_V1 0x02 +#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define E1000_TSYNCRXCTL_TYPE_ALL 0x08 +#define E1000_TSYNCRXCTL_TYPE_EVENT_V2 0x0A +#define E1000_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */ +#define E1000_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */ + +#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF +#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00 +#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01 +#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE 0x02 +#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03 +#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04 + +#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK 0x00000F00 +#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE 0x0000 +#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE 0x0100 +#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE 0x0200 +#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE 0x0300 +#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE 0x0800 +#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE 0x0900 +#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00 +#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE 0x0B00 +#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE 0x0C00 +#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE 0x0D00 + +#define E1000_TIMINCA_16NS_SHIFT 24 +#define E1000_TIMINCA_INCPERIOD_SHIFT 24 +#define E1000_TIMINCA_INCVALUE_MASK 0x00FFFFFF + +#define E1000_TSICR_TXTS 0x00000002 +#define E1000_TSIM_TXTS 0x00000002 +/* TUPLE Filtering Configuration */ +#define E1000_TTQF_DISABLE_MASK 0xF0008000 /* TTQF Disable Mask */ +#define E1000_TTQF_QUEUE_ENABLE 0x100 /* TTQF Queue Enable Bit */ +#define E1000_TTQF_PROTOCOL_MASK 0xFF /* TTQF Protocol Mask */ +/* TTQF TCP Bit, shift with E1000_TTQF_PROTOCOL SHIFT */ +#define E1000_TTQF_PROTOCOL_TCP 0x0 +/* TTQF UDP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ +#define E1000_TTQF_PROTOCOL_UDP 0x1 +/* TTQF SCTP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ +#define E1000_TTQF_PROTOCOL_SCTP 0x2 +#define E1000_TTQF_PROTOCOL_SHIFT 5 /* TTQF Protocol Shift */ +#define E1000_TTQF_QUEUE_SHIFT 16 /* TTQF Queue Shfit */ +#define E1000_TTQF_RX_QUEUE_MASK 0x70000 /* TTQF Queue Mask */ +#define E1000_TTQF_MASK_ENABLE 0x10000000 /* TTQF Mask Enable Bit */ +#define E1000_IMIR_CLEAR_MASK 0xF001FFFF /* IMIR Reg Clear Mask */ +#define E1000_IMIR_PORT_BYPASS 0x20000 /* IMIR Port Bypass Bit */ +#define E1000_IMIR_PRIORITY_SHIFT 29 /* IMIR Priority Shift */ +#define E1000_IMIREXT_CLEAR_MASK 0x7FFFF /* IMIREXT Reg Clear Mask */ + +#define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ +#define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ +#define E1000_MDICNFG_PHY_MASK 0x03E00000 +#define E1000_MDICNFG_PHY_SHIFT 21 + +#define E1000_MEDIA_PORT_COPPER 1 +#define E1000_MEDIA_PORT_OTHER 2 +#define E1000_M88E1112_AUTO_COPPER_SGMII 0x2 +#define E1000_M88E1112_AUTO_COPPER_BASEX 0x3 +#define E1000_M88E1112_STATUS_LINK 0x0004 /* Interface Link Bit */ +#define E1000_M88E1112_MAC_CTRL_1 0x10 +#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */ +#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT 7 +#define E1000_M88E1112_PAGE_ADDR 0x16 +#define E1000_M88E1112_STATUS 0x01 + +#define E1000_THSTAT_LOW_EVENT 0x20000000 /* Low thermal threshold */ +#define E1000_THSTAT_MID_EVENT 0x00200000 /* Mid thermal threshold */ +#define E1000_THSTAT_HIGH_EVENT 0x00002000 /* High thermal threshold */ +#define E1000_THSTAT_PWR_DOWN 0x00000001 /* Power Down Event */ +#define E1000_THSTAT_LINK_THROTTLE 0x00000002 /* Link Spd Throttle Event */ + +/* I350 EEE defines */ +#define E1000_IPCNFG_EEE_1G_AN 0x00000008 /* IPCNFG EEE Ena 1G AN */ +#define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* IPCNFG EEE Ena 100M AN */ +#define E1000_EEER_TX_LPI_EN 0x00010000 /* EEER Tx LPI Enable */ +#define E1000_EEER_RX_LPI_EN 0x00020000 /* EEER Rx LPI Enable */ +#define E1000_EEER_LPI_FC 0x00040000 /* EEER Ena on Flow Cntrl */ +/* EEE status */ +#define E1000_EEER_EEE_NEG 0x20000000 /* EEE capability nego */ +#define E1000_EEER_RX_LPI_STATUS 0x40000000 /* Rx in LPI state */ +#define E1000_EEER_TX_LPI_STATUS 0x80000000 /* Tx in LPI state */ +#define E1000_EEE_LP_ADV_ADDR_I350 0x040F /* EEE LP Advertisement */ +#define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */ +#define E1000_M88E1543_EEE_CTRL_1 0x0 +#define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */ +#define E1000_EEE_ADV_DEV_I354 7 +#define E1000_EEE_ADV_ADDR_I354 60 +#define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */ +#define E1000_EEE_ADV_1000_SUPPORTED (1 << 2) /* 1000BaseT EEE Supported */ +#define E1000_PCS_STATUS_DEV_I354 3 +#define E1000_PCS_STATUS_ADDR_I354 1 +#define E1000_PCS_STATUS_RX_LPI_RCVD 0x0400 +#define E1000_PCS_STATUS_TX_LPI_RCVD 0x0800 +#define E1000_EEE_SU_LPI_CLK_STP 0x00800000 /* EEE LPI Clock Stop */ +#define E1000_EEE_LP_ADV_DEV_I210 7 /* EEE LP Adv Device */ +#define E1000_EEE_LP_ADV_ADDR_I210 61 /* EEE LP Adv Register */ +/* PCI Express Control */ +#define E1000_GCR_RXD_NO_SNOOP 0x00000001 +#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 +#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 +#define E1000_GCR_TXD_NO_SNOOP 0x00000008 +#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 +#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 +#define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000 +#define E1000_GCR_CMPL_TMOUT_10ms 0x00001000 +#define E1000_GCR_CMPL_TMOUT_RESEND 0x00010000 +#define E1000_GCR_CAP_VER2 0x00040000 + +#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ + E1000_GCR_RXDSCW_NO_SNOOP | \ + E1000_GCR_RXDSCR_NO_SNOOP | \ + E1000_GCR_TXD_NO_SNOOP | \ + E1000_GCR_TXDSCW_NO_SNOOP | \ + E1000_GCR_TXDSCR_NO_SNOOP) + +#define E1000_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */ + +/* mPHY address control and data registers */ +#define E1000_MPHY_ADDR_CTL 0x0024 /* Address Control Reg */ +#define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000 +#define E1000_MPHY_DATA 0x0E10 /* Data Register */ + +/* AFE CSR Offset for PCS CLK */ +#define E1000_MPHY_PCS_CLK_REG_OFFSET 0x0004 +/* Override for near end digital loopback. */ +#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10 + +/* PHY Control Register */ +#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ +#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ +#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ +#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ +#define MII_CR_POWER_DOWN 0x0800 /* Power down */ +#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ +#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ +#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ +#define MII_CR_SPEED_1000 0x0040 +#define MII_CR_SPEED_100 0x2000 +#define MII_CR_SPEED_10 0x0000 + +/* PHY Status Register */ +#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ +#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ +#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ +#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ +#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ +#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ +#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ +#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ +#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ +#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ +#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ +#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ +#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ +#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ +#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ + +/* Autoneg Advertisement Register */ +#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ +#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ +#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ +#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ +#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ +#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ +#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ +#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ +#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ +#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ + +/* Link Partner Ability Register (Base Page) */ +#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */ +#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP 10T Half Dplx Capable */ +#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP 10T Full Dplx Capable */ +#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP 100TX Half Dplx Capable */ +#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP 100TX Full Dplx Capable */ +#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */ +#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ +#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asym Pause Direction bit */ +#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP detected Remote Fault */ +#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP rx'd link code word */ +#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */ + +/* Autoneg Expansion Register */ +#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ +#define NWAY_ER_PAGE_RXD 0x0002 /* LP 10T Half Dplx Capable */ +#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP 10T Full Dplx Capable */ +#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP 100TX Half Dplx Capable */ +#define NWAY_ER_PAR_DETECT_FAULT 0x0010 /* LP 100TX Full Dplx Capable */ + +/* 1000BASE-T Control Register */ +#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ +#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ +#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ +/* 1=Repeater/switch device port 0=DTE device */ +#define CR_1000T_REPEATER_DTE 0x0400 +/* 1=Configure PHY as Master 0=Configure PHY as Slave */ +#define CR_1000T_MS_VALUE 0x0800 +/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */ +#define CR_1000T_MS_ENABLE 0x1000 +#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ +#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ +#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ +#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ +#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ + +/* 1000BASE-T Status Register */ +#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle err since last rd */ +#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asym pause direction bit */ +#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ +#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ +#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ +#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ +#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local Tx Master, 0=Slave */ +#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ + +#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5 + +/* PHY 1000 MII Register/Bit Definitions */ +/* PHY Registers defined by IEEE */ +#define PHY_CONTROL 0x00 /* Control Register */ +#define PHY_STATUS 0x01 /* Status Register */ +#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ +#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ +#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ +#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ +#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ +#define PHY_NEXT_PAGE_TX 0x07 /* Next Page Tx */ +#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ +#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ +#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ +#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ + +#define PHY_CONTROL_LB 0x4000 /* PHY Loopback bit */ + +/* NVM Control */ +#define E1000_EECD_SK 0x00000001 /* NVM Clock */ +#define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ +#define E1000_EECD_DI 0x00000004 /* NVM Data In */ +#define E1000_EECD_DO 0x00000008 /* NVM Data Out */ +#define E1000_EECD_REQ 0x00000040 /* NVM Access Request */ +#define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */ +#define E1000_EECD_PRES 0x00000100 /* NVM Present */ +#define E1000_EECD_SIZE 0x00000200 /* NVM Size (0=64 word 1=256 word) */ +#define E1000_EECD_BLOCKED 0x00008000 /* Bit banging access blocked flag */ +#define E1000_EECD_ABORT 0x00010000 /* NVM operation aborted flag */ +#define E1000_EECD_TIMEOUT 0x00020000 /* NVM read operation timeout flag */ +#define E1000_EECD_ERROR_CLR 0x00040000 /* NVM error status clear bit */ +/* NVM Addressing bits based on type 0=small, 1=large */ +#define E1000_EECD_ADDR_BITS 0x00000400 +#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ +#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ +#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ +#define E1000_EECD_SIZE_EX_SHIFT 11 +#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ +#define E1000_EECD_AUPDEN 0x00100000 /* Ena Auto FLASH update */ +#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ +#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES) +#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */ +#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done */ +#define E1000_EECD_FLASH_DETECTED_I210 0x00080000 /* FLASH detected */ +#define E1000_EECD_SEC1VAL_I210 0x02000000 /* Sector One Valid */ +#define E1000_FLUDONE_ATTEMPTS 20000 +#define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */ +#define E1000_I210_FIFO_SEL_RX 0x00 +#define E1000_I210_FIFO_SEL_TX_QAV(_i) (0x02 + (_i)) +#define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0) +#define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06 +#define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01 + +#define E1000_I210_FLASH_SECTOR_SIZE 0x1000 /* 4KB FLASH sector unit size */ +/* Secure FLASH mode requires removing MSb */ +#define E1000_I210_FW_PTR_MASK 0x7FFF +/* Firmware code revision field word offset*/ +#define E1000_I210_FW_VER_OFFSET 328 + +#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write regs */ +#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ +#define E1000_NVM_RW_REG_START 1 /* Start operation */ +#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ +#define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */ +#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ +#define E1000_FLASH_UPDATES 2000 + +/* NVM Word Offsets */ +#define NVM_COMPAT 0x0003 +#define NVM_ID_LED_SETTINGS 0x0004 +#define NVM_VERSION 0x0005 +#define E1000_I210_NVM_FW_MODULE_PTR 0x0010 +#define E1000_I350_NVM_FW_MODULE_PTR 0x0051 +#define NVM_FUTURE_INIT_WORD1 0x0019 +#define NVM_ETRACK_WORD 0x0042 +#define NVM_ETRACK_HIWORD 0x0043 +#define NVM_COMB_VER_OFF 0x0083 +#define NVM_COMB_VER_PTR 0x003d + +/* NVM version defines */ +#define NVM_MAJOR_MASK 0xF000 +#define NVM_MINOR_MASK 0x0FF0 +#define NVM_IMAGE_ID_MASK 0x000F +#define NVM_COMB_VER_MASK 0x00FF +#define NVM_MAJOR_SHIFT 12 +#define NVM_MINOR_SHIFT 4 +#define NVM_COMB_VER_SHFT 8 +#define NVM_VER_INVALID 0xFFFF +#define NVM_ETRACK_SHIFT 16 +#define NVM_ETRACK_VALID 0x8000 +#define NVM_NEW_DEC_MASK 0x0F00 +#define NVM_HEX_CONV 16 +#define NVM_HEX_TENS 10 + +/* FW version defines */ +/* Offset of "Loader patch ptr" in Firmware Header */ +#define E1000_I350_NVM_FW_LOADER_PATCH_PTR_OFFSET 0x01 +/* Patch generation hour & minutes */ +#define E1000_I350_NVM_FW_VER_WORD1_OFFSET 0x04 +/* Patch generation month & day */ +#define E1000_I350_NVM_FW_VER_WORD2_OFFSET 0x05 +/* Patch generation year */ +#define E1000_I350_NVM_FW_VER_WORD3_OFFSET 0x06 +/* Patch major & minor numbers */ +#define E1000_I350_NVM_FW_VER_WORD4_OFFSET 0x07 + +#define NVM_MAC_ADDR 0x0000 +#define NVM_SUB_DEV_ID 0x000B +#define NVM_SUB_VEN_ID 0x000C +#define NVM_DEV_ID 0x000D +#define NVM_VEN_ID 0x000E +#define NVM_INIT_CTRL_2 0x000F +#define NVM_INIT_CTRL_4 0x0013 +#define NVM_LED_1_CFG 0x001C +#define NVM_LED_0_2_CFG 0x001F + +#define NVM_COMPAT_VALID_CSUM 0x0001 +#define NVM_FUTURE_INIT_WORD1_VALID_CSUM 0x0040 + +#define NVM_ETS_CFG 0x003E +#define NVM_ETS_LTHRES_DELTA_MASK 0x07C0 +#define NVM_ETS_LTHRES_DELTA_SHIFT 6 +#define NVM_ETS_TYPE_MASK 0x0038 +#define NVM_ETS_TYPE_SHIFT 3 +#define NVM_ETS_TYPE_EMC 0x000 +#define NVM_ETS_NUM_SENSORS_MASK 0x0007 +#define NVM_ETS_DATA_LOC_MASK 0x3C00 +#define NVM_ETS_DATA_LOC_SHIFT 10 +#define NVM_ETS_DATA_INDEX_MASK 0x0300 +#define NVM_ETS_DATA_INDEX_SHIFT 8 +#define NVM_ETS_DATA_HTHRESH_MASK 0x00FF +#define NVM_INIT_CONTROL2_REG 0x000F +#define NVM_INIT_CONTROL3_PORT_B 0x0014 +#define NVM_INIT_3GIO_3 0x001A +#define NVM_SWDEF_PINS_CTRL_PORT_0 0x0020 +#define NVM_INIT_CONTROL3_PORT_A 0x0024 +#define NVM_CFG 0x0012 +#define NVM_ALT_MAC_ADDR_PTR 0x0037 +#define NVM_CHECKSUM_REG 0x003F +#define NVM_COMPATIBILITY_REG_3 0x0003 +#define NVM_COMPATIBILITY_BIT_MASK 0x8000 + +#define E1000_NVM_CFG_DONE_PORT_0 0x040000 /* MNG config cycle done */ +#define E1000_NVM_CFG_DONE_PORT_1 0x080000 /* ...for second port */ +#define E1000_NVM_CFG_DONE_PORT_2 0x100000 /* ...for third port */ +#define E1000_NVM_CFG_DONE_PORT_3 0x200000 /* ...for fourth port */ + +#define NVM_82580_LAN_FUNC_OFFSET(a) ((a) ? (0x40 + (0x40 * (a))) : 0) + +/* Mask bits for fields in Word 0x24 of the NVM */ +#define NVM_WORD24_COM_MDIO 0x0008 /* MDIO interface shared */ +#define NVM_WORD24_EXT_MDIO 0x0004 /* MDIO accesses routed extrnl */ +/* Offset of Link Mode bits for 82575/82576 */ +#define NVM_WORD24_LNK_MODE_OFFSET 8 +/* Offset of Link Mode bits for 82580 up */ +#define NVM_WORD24_82580_LNK_MODE_OFFSET 4 + + +/* Mask bits for fields in Word 0x0f of the NVM */ +#define NVM_WORD0F_PAUSE_MASK 0x3000 +#define NVM_WORD0F_PAUSE 0x1000 +#define NVM_WORD0F_ASM_DIR 0x2000 + +/* Mask bits for fields in Word 0x1a of the NVM */ +#define NVM_WORD1A_ASPM_MASK 0x000C + +/* Mask bits for fields in Word 0x03 of the EEPROM */ +#define NVM_COMPAT_LOM 0x0800 + +/* length of string needed to store PBA number */ +#define E1000_PBANUM_LENGTH 11 + +/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ +#define NVM_SUM 0xBABA + +/* PBA (printed board assembly) number words */ +#define NVM_PBA_OFFSET_0 8 +#define NVM_PBA_OFFSET_1 9 +#define NVM_PBA_PTR_GUARD 0xFAFA +#define NVM_RESERVED_WORD 0xFFFF +#define NVM_WORD_SIZE_BASE_SHIFT 6 + +/* NVM Commands - SPI */ +#define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ +#define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */ +#define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */ +#define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ +#define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */ +#define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */ + +/* SPI NVM Status Register */ +#define NVM_STATUS_RDY_SPI 0x01 + +/* Word definitions for ID LED Settings */ +#define ID_LED_RESERVED_0000 0x0000 +#define ID_LED_RESERVED_FFFF 0xFFFF +#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ + (ID_LED_OFF1_OFF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_DEF1_DEF2)) +#define ID_LED_DEF1_DEF2 0x1 +#define ID_LED_DEF1_ON2 0x2 +#define ID_LED_DEF1_OFF2 0x3 +#define ID_LED_ON1_DEF2 0x4 +#define ID_LED_ON1_ON2 0x5 +#define ID_LED_ON1_OFF2 0x6 +#define ID_LED_OFF1_DEF2 0x7 +#define ID_LED_OFF1_ON2 0x8 +#define ID_LED_OFF1_OFF2 0x9 + +#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF +#define IGP_ACTIVITY_LED_ENABLE 0x0300 +#define IGP_LED3_MODE 0x07000000 + +/* PCI/PCI-X/PCI-EX Config space */ +#define PCI_HEADER_TYPE_REGISTER 0x0E +#define PCIE_LINK_STATUS 0x12 +#define PCIE_DEVICE_CONTROL2 0x28 + +#define PCI_HEADER_TYPE_MULTIFUNC 0x80 +#define PCIE_LINK_WIDTH_MASK 0x3F0 +#define PCIE_LINK_WIDTH_SHIFT 4 +#define PCIE_LINK_SPEED_MASK 0x0F +#define PCIE_LINK_SPEED_2500 0x01 +#define PCIE_LINK_SPEED_5000 0x02 +#define PCIE_DEVICE_CONTROL2_16ms 0x0005 + +#ifndef ETH_ADDR_LEN +#define ETH_ADDR_LEN 6 +#endif + +#define PHY_REVISION_MASK 0xFFFFFFF0 +#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ +#define MAX_PHY_MULTI_PAGE_REG 0xF + +/* Bit definitions for valid PHY IDs. + * I = Integrated + * E = External + */ +#define M88E1000_E_PHY_ID 0x01410C50 +#define M88E1000_I_PHY_ID 0x01410C30 +#define M88E1011_I_PHY_ID 0x01410C20 +#define IGP01E1000_I_PHY_ID 0x02A80380 +#define M88E1111_I_PHY_ID 0x01410CC0 +#define M88E1543_E_PHY_ID 0x01410EA0 +#define M88E1112_E_PHY_ID 0x01410C90 +#define I347AT4_E_PHY_ID 0x01410DC0 +#define M88E1340M_E_PHY_ID 0x01410DF0 +#define GG82563_E_PHY_ID 0x01410CA0 +#define IGP03E1000_E_PHY_ID 0x02A80390 +#define IFE_E_PHY_ID 0x02A80330 +#define IFE_PLUS_E_PHY_ID 0x02A80320 +#define IFE_C_E_PHY_ID 0x02A80310 +#define I82580_I_PHY_ID 0x015403A0 +#define I350_I_PHY_ID 0x015403B0 +#define I210_I_PHY_ID 0x01410C00 +#define IGP04E1000_E_PHY_ID 0x02A80391 +#define M88_VENDOR 0x0141 + +/* M88E1000 Specific Registers */ +#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Reg */ +#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Reg */ +#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Cntrl */ +#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ + +#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for pg number setting */ +#define M88E1000_PHY_GEN_CONTROL 0x1E /* meaning depends on reg 29 */ + +/* M88E1000 PHY Specific Control Register */ +#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reverse enabled */ +/* MDI Crossover Mode bits 6:5 Manual MDI configuration */ +#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 +#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ +/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */ +#define M88E1000_PSCR_AUTO_X_1000T 0x0040 +/* Auto crossover enabled all speeds */ +#define M88E1000_PSCR_AUTO_X_MODE 0x0060 +#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Tx */ + +/* M88E1000 PHY Specific Status Register */ +#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ +#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ +#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ +/* 0 = <50M + * 1 = 50-80M + * 2 = 80-110M + * 3 = 110-140M + * 4 = >140M + */ +#define M88E1000_PSSR_CABLE_LENGTH 0x0380 +#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */ +#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */ +#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ +#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ + +#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 + +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the master + */ +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the slave + */ +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 +#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ + +/* Intel I347AT4 Registers */ +#define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */ +#define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */ +#define I347AT4_PAGE_SELECT 0x16 + +/* I347AT4 Extended PHY Specific Control Register */ + +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the master + */ +#define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800 +#define I347AT4_PSCR_DOWNSHIFT_MASK 0x7000 +#define I347AT4_PSCR_DOWNSHIFT_1X 0x0000 +#define I347AT4_PSCR_DOWNSHIFT_2X 0x1000 +#define I347AT4_PSCR_DOWNSHIFT_3X 0x2000 +#define I347AT4_PSCR_DOWNSHIFT_4X 0x3000 +#define I347AT4_PSCR_DOWNSHIFT_5X 0x4000 +#define I347AT4_PSCR_DOWNSHIFT_6X 0x5000 +#define I347AT4_PSCR_DOWNSHIFT_7X 0x6000 +#define I347AT4_PSCR_DOWNSHIFT_8X 0x7000 + +/* I347AT4 PHY Cable Diagnostics Control */ +#define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */ + +/* M88E1112 only registers */ +#define M88E1112_VCT_DSP_DISTANCE 0x001A + +/* M88EC018 Rev 2 specific DownShift settings */ +#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 +#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 + +/* Bits... + * 15-5: page + * 4-0: register offset + */ +#define GG82563_PAGE_SHIFT 5 +#define GG82563_REG(page, reg) \ + (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) +#define GG82563_MIN_ALT_REG 30 + +/* GG82563 Specific Registers */ +#define GG82563_PHY_SPEC_CTRL GG82563_REG(0, 16) /* PHY Spec Cntrl */ +#define GG82563_PHY_PAGE_SELECT GG82563_REG(0, 22) /* Page Select */ +#define GG82563_PHY_SPEC_CTRL_2 GG82563_REG(0, 26) /* PHY Spec Cntrl2 */ +#define GG82563_PHY_PAGE_SELECT_ALT GG82563_REG(0, 29) /* Alt Page Select */ + +/* MAC Specific Control Register */ +#define GG82563_PHY_MAC_SPEC_CTRL GG82563_REG(2, 21) + +#define GG82563_PHY_DSP_DISTANCE GG82563_REG(5, 26) /* DSP Distance */ + +/* Page 193 - Port Control Registers */ +/* Kumeran Mode Control */ +#define GG82563_PHY_KMRN_MODE_CTRL GG82563_REG(193, 16) +#define GG82563_PHY_PWR_MGMT_CTRL GG82563_REG(193, 20) /* Pwr Mgt Ctrl */ + +/* Page 194 - KMRN Registers */ +#define GG82563_PHY_INBAND_CTRL GG82563_REG(194, 18) /* Inband Ctrl */ + +/* MDI Control */ +#define E1000_MDIC_REG_MASK 0x001F0000 +#define E1000_MDIC_REG_SHIFT 16 +#define E1000_MDIC_PHY_MASK 0x03E00000 +#define E1000_MDIC_PHY_SHIFT 21 +#define E1000_MDIC_OP_WRITE 0x04000000 +#define E1000_MDIC_OP_READ 0x08000000 +#define E1000_MDIC_READY 0x10000000 +#define E1000_MDIC_ERROR 0x40000000 +#define E1000_MDIC_DEST 0x80000000 + +/* SerDes Control */ +#define E1000_GEN_CTL_READY 0x80000000 +#define E1000_GEN_CTL_ADDRESS_SHIFT 8 +#define E1000_GEN_POLL_TIMEOUT 640 + +/* LinkSec register fields */ +#define E1000_LSECTXCAP_SUM_MASK 0x00FF0000 +#define E1000_LSECTXCAP_SUM_SHIFT 16 +#define E1000_LSECRXCAP_SUM_MASK 0x00FF0000 +#define E1000_LSECRXCAP_SUM_SHIFT 16 + +#define E1000_LSECTXCTRL_EN_MASK 0x00000003 +#define E1000_LSECTXCTRL_DISABLE 0x0 +#define E1000_LSECTXCTRL_AUTH 0x1 +#define E1000_LSECTXCTRL_AUTH_ENCRYPT 0x2 +#define E1000_LSECTXCTRL_AISCI 0x00000020 +#define E1000_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00 +#define E1000_LSECTXCTRL_RSV_MASK 0x000000D8 + +#define E1000_LSECRXCTRL_EN_MASK 0x0000000C +#define E1000_LSECRXCTRL_EN_SHIFT 2 +#define E1000_LSECRXCTRL_DISABLE 0x0 +#define E1000_LSECRXCTRL_CHECK 0x1 +#define E1000_LSECRXCTRL_STRICT 0x2 +#define E1000_LSECRXCTRL_DROP 0x3 +#define E1000_LSECRXCTRL_PLSH 0x00000040 +#define E1000_LSECRXCTRL_RP 0x00000080 +#define E1000_LSECRXCTRL_RSV_MASK 0xFFFFFF33 + +/* Tx Rate-Scheduler Config fields */ +#define E1000_RTTBCNRC_RS_ENA 0x80000000 +#define E1000_RTTBCNRC_RF_DEC_MASK 0x00003FFF +#define E1000_RTTBCNRC_RF_INT_SHIFT 14 +#define E1000_RTTBCNRC_RF_INT_MASK \ + (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT) + +/* DMA Coalescing register fields */ +/* DMA Coalescing Watchdog Timer */ +#define E1000_DMACR_DMACWT_MASK 0x00003FFF +/* DMA Coalescing Rx Threshold */ +#define E1000_DMACR_DMACTHR_MASK 0x00FF0000 +#define E1000_DMACR_DMACTHR_SHIFT 16 +/* Lx when no PCIe transactions */ +#define E1000_DMACR_DMAC_LX_MASK 0x30000000 +#define E1000_DMACR_DMAC_LX_SHIFT 28 +#define E1000_DMACR_DMAC_EN 0x80000000 /* Enable DMA Coalescing */ +/* DMA Coalescing BMC-to-OS Watchdog Enable */ +#define E1000_DMACR_DC_BMC2OSW_EN 0x00008000 + +/* DMA Coalescing Transmit Threshold */ +#define E1000_DMCTXTH_DMCTTHR_MASK 0x00000FFF + +#define E1000_DMCTLX_TTLX_MASK 0x00000FFF /* Time to LX request */ + +/* Rx Traffic Rate Threshold */ +#define E1000_DMCRTRH_UTRESH_MASK 0x0007FFFF +/* Rx packet rate in current window */ +#define E1000_DMCRTRH_LRPRCW 0x80000000 + +/* DMA Coal Rx Traffic Current Count */ +#define E1000_DMCCNT_CCOUNT_MASK 0x01FFFFFF + +/* Flow ctrl Rx Threshold High val */ +#define E1000_FCRTC_RTH_COAL_MASK 0x0003FFF0 +#define E1000_FCRTC_RTH_COAL_SHIFT 4 +/* Lx power decision based on DMA coal */ +#define E1000_PCIEMISC_LX_DECISION 0x00000080 + +#define E1000_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */ +#define E1000_RXPBS_SIZE_I210_MASK 0x0000003F /* Rx packet buffer size */ +#define E1000_TXPB0S_SIZE_I210_MASK 0x0000003F /* Tx packet buffer 0 size */ + +/* Proxy Filter Control */ +#define E1000_PROXYFC_D0 0x00000001 /* Enable offload in D0 */ +#define E1000_PROXYFC_EX 0x00000004 /* Directed exact proxy */ +#define E1000_PROXYFC_MC 0x00000008 /* Directed MC Proxy */ +#define E1000_PROXYFC_BC 0x00000010 /* Broadcast Proxy Enable */ +#define E1000_PROXYFC_ARP_DIRECTED 0x00000020 /* Directed ARP Proxy Ena */ +#define E1000_PROXYFC_IPV4 0x00000040 /* Directed IPv4 Enable */ +#define E1000_PROXYFC_IPV6 0x00000080 /* Directed IPv6 Enable */ +#define E1000_PROXYFC_NS 0x00000200 /* IPv6 Neighbor Solicitation */ +#define E1000_PROXYFC_ARP 0x00000800 /* ARP Request Proxy Ena */ +/* Proxy Status */ +#define E1000_PROXYS_CLEAR 0xFFFFFFFF /* Clear */ + +/* Firmware Status */ +#define E1000_FWSTS_FWRI 0x80000000 /* FW Reset Indication */ +/* VF Control */ +#define E1000_VTCTRL_RST 0x04000000 /* Reset VF */ + +#define E1000_STATUS_LAN_ID_MASK 0x00000000C /* Mask for Lan ID field */ +/* Lan ID bit field offset in status register */ +#define E1000_STATUS_LAN_ID_OFFSET 2 +#define E1000_VFTA_ENTRIES 128 +#ifndef E1000_UNUSEDARG +#define E1000_UNUSEDARG +#endif /* E1000_UNUSEDARG */ +#endif /* _E1000_DEFINES_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h new file mode 100644 index 00000000..347cef71 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h @@ -0,0 +1,793 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_HW_H_ +#define _E1000_HW_H_ + +#include "e1000_osdep.h" +#include "e1000_regs.h" +#include "e1000_defines.h" + +struct e1000_hw; + +#define E1000_DEV_ID_82576 0x10C9 +#define E1000_DEV_ID_82576_FIBER 0x10E6 +#define E1000_DEV_ID_82576_SERDES 0x10E7 +#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 +#define E1000_DEV_ID_82576_NS 0x150A +#define E1000_DEV_ID_82576_NS_SERDES 0x1518 +#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D +#define E1000_DEV_ID_82575EB_COPPER 0x10A7 +#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 +#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 +#define E1000_DEV_ID_82580_COPPER 0x150E +#define E1000_DEV_ID_82580_FIBER 0x150F +#define E1000_DEV_ID_82580_SERDES 0x1510 +#define E1000_DEV_ID_82580_SGMII 0x1511 +#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 +#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 +#define E1000_DEV_ID_I350_COPPER 0x1521 +#define E1000_DEV_ID_I350_FIBER 0x1522 +#define E1000_DEV_ID_I350_SERDES 0x1523 +#define E1000_DEV_ID_I350_SGMII 0x1524 +#define E1000_DEV_ID_I350_DA4 0x1546 +#define E1000_DEV_ID_I210_COPPER 0x1533 +#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 +#define E1000_DEV_ID_I210_COPPER_IT 0x1535 +#define E1000_DEV_ID_I210_FIBER 0x1536 +#define E1000_DEV_ID_I210_SERDES 0x1537 +#define E1000_DEV_ID_I210_SGMII 0x1538 +#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B +#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C +#define E1000_DEV_ID_I211_COPPER 0x1539 +#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 +#define E1000_DEV_ID_I354_SGMII 0x1F41 +#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 +#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 +#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A +#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C +#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 + +#define E1000_REVISION_0 0 +#define E1000_REVISION_1 1 +#define E1000_REVISION_2 2 +#define E1000_REVISION_3 3 +#define E1000_REVISION_4 4 + +#define E1000_FUNC_0 0 +#define E1000_FUNC_1 1 +#define E1000_FUNC_2 2 +#define E1000_FUNC_3 3 + +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 + +enum e1000_mac_type { + e1000_undefined = 0, + e1000_82575, + e1000_82576, + e1000_82580, + e1000_i350, + e1000_i354, + e1000_i210, + e1000_i211, + e1000_num_macs /* List is 1-based, so subtract 1 for true count. */ +}; + +enum e1000_media_type { + e1000_media_type_unknown = 0, + e1000_media_type_copper = 1, + e1000_media_type_fiber = 2, + e1000_media_type_internal_serdes = 3, + e1000_num_media_types +}; + +enum e1000_nvm_type { + e1000_nvm_unknown = 0, + e1000_nvm_none, + e1000_nvm_eeprom_spi, + e1000_nvm_flash_hw, + e1000_nvm_invm, + e1000_nvm_flash_sw +}; + +enum e1000_nvm_override { + e1000_nvm_override_none = 0, + e1000_nvm_override_spi_small, + e1000_nvm_override_spi_large, +}; + +enum e1000_phy_type { + e1000_phy_unknown = 0, + e1000_phy_none, + e1000_phy_m88, + e1000_phy_igp, + e1000_phy_igp_2, + e1000_phy_gg82563, + e1000_phy_igp_3, + e1000_phy_ife, + e1000_phy_82580, + e1000_phy_vf, + e1000_phy_i210, +}; + +enum e1000_bus_type { + e1000_bus_type_unknown = 0, + e1000_bus_type_pci, + e1000_bus_type_pcix, + e1000_bus_type_pci_express, + e1000_bus_type_reserved +}; + +enum e1000_bus_speed { + e1000_bus_speed_unknown = 0, + e1000_bus_speed_33, + e1000_bus_speed_66, + e1000_bus_speed_100, + e1000_bus_speed_120, + e1000_bus_speed_133, + e1000_bus_speed_2500, + e1000_bus_speed_5000, + e1000_bus_speed_reserved +}; + +enum e1000_bus_width { + e1000_bus_width_unknown = 0, + e1000_bus_width_pcie_x1, + e1000_bus_width_pcie_x2, + e1000_bus_width_pcie_x4 = 4, + e1000_bus_width_pcie_x8 = 8, + e1000_bus_width_32, + e1000_bus_width_64, + e1000_bus_width_reserved +}; + +enum e1000_1000t_rx_status { + e1000_1000t_rx_status_not_ok = 0, + e1000_1000t_rx_status_ok, + e1000_1000t_rx_status_undefined = 0xFF +}; + +enum e1000_rev_polarity { + e1000_rev_polarity_normal = 0, + e1000_rev_polarity_reversed, + e1000_rev_polarity_undefined = 0xFF +}; + +enum e1000_fc_mode { + e1000_fc_none = 0, + e1000_fc_rx_pause, + e1000_fc_tx_pause, + e1000_fc_full, + e1000_fc_default = 0xFF +}; + +enum e1000_ms_type { + e1000_ms_hw_default = 0, + e1000_ms_force_master, + e1000_ms_force_slave, + e1000_ms_auto +}; + +enum e1000_smart_speed { + e1000_smart_speed_default = 0, + e1000_smart_speed_on, + e1000_smart_speed_off +}; + +enum e1000_serdes_link_state { + e1000_serdes_link_down = 0, + e1000_serdes_link_autoneg_progress, + e1000_serdes_link_autoneg_complete, + e1000_serdes_link_forced_up +}; + +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 +#endif +/* Receive Descriptor */ +struct e1000_rx_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + __le16 length; /* Length of data DMAed into data buffer */ + __le16 csum; /* Packet checksum */ + u8 status; /* Descriptor status */ + u8 errors; /* Descriptor Errors */ + __le16 special; +}; + +/* Receive Descriptor - Extended */ +union e1000_rx_desc_extended { + struct { + __le64 buffer_addr; + __le64 reserved; + } read; + struct { + struct { + __le32 mrq; /* Multiple Rx Queues */ + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length; + __le16 vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +#define MAX_PS_BUFFERS 4 + +/* Number of packet split data buffers (not including the header buffer) */ +#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) + +/* Receive Descriptor - Packet Split */ +union e1000_rx_desc_packet_split { + struct { + /* one buffer for protocol header(s), three data buffers */ + __le64 buffer_addr[MAX_PS_BUFFERS]; + } read; + struct { + struct { + __le32 mrq; /* Multiple Rx Queues */ + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length0; /* length of buffer 0 */ + __le16 vlan; /* VLAN tag */ + } middle; + struct { + __le16 header_status; + /* length of buffers 1-3 */ + __le16 length[PS_PAGE_BUFFERS]; + } upper; + __le64 reserved; + } wb; /* writeback */ +}; + +/* Transmit Descriptor */ +struct e1000_tx_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + union { + __le32 data; + struct { + __le16 length; /* Data buffer length */ + u8 cso; /* Checksum offset */ + u8 cmd; /* Descriptor control */ + } flags; + } lower; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 css; /* Checksum start */ + __le16 special; + } fields; + } upper; +}; + +/* Offload Context Descriptor */ +struct e1000_context_desc { + union { + __le32 ip_config; + struct { + u8 ipcss; /* IP checksum start */ + u8 ipcso; /* IP checksum offset */ + __le16 ipcse; /* IP checksum end */ + } ip_fields; + } lower_setup; + union { + __le32 tcp_config; + struct { + u8 tucss; /* TCP checksum start */ + u8 tucso; /* TCP checksum offset */ + __le16 tucse; /* TCP checksum end */ + } tcp_fields; + } upper_setup; + __le32 cmd_and_length; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 hdr_len; /* Header length */ + __le16 mss; /* Maximum segment size */ + } fields; + } tcp_seg_setup; +}; + +/* Offload data descriptor */ +struct e1000_data_desc { + __le64 buffer_addr; /* Address of the descriptor's buffer address */ + union { + __le32 data; + struct { + __le16 length; /* Data buffer length */ + u8 typ_len_ext; + u8 cmd; + } flags; + } lower; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 popts; /* Packet Options */ + __le16 special; + } fields; + } upper; +}; + +/* Statistics counters collected by the MAC */ +struct e1000_hw_stats { + u64 crcerrs; + u64 algnerrc; + u64 symerrs; + u64 rxerrc; + u64 mpc; + u64 scc; + u64 ecol; + u64 mcc; + u64 latecol; + u64 colc; + u64 dc; + u64 tncrs; + u64 sec; + u64 cexterr; + u64 rlec; + u64 xonrxc; + u64 xontxc; + u64 xoffrxc; + u64 xofftxc; + u64 fcruc; + u64 prc64; + u64 prc127; + u64 prc255; + u64 prc511; + u64 prc1023; + u64 prc1522; + u64 gprc; + u64 bprc; + u64 mprc; + u64 gptc; + u64 gorc; + u64 gotc; + u64 rnbc; + u64 ruc; + u64 rfc; + u64 roc; + u64 rjc; + u64 mgprc; + u64 mgpdc; + u64 mgptc; + u64 tor; + u64 tot; + u64 tpr; + u64 tpt; + u64 ptc64; + u64 ptc127; + u64 ptc255; + u64 ptc511; + u64 ptc1023; + u64 ptc1522; + u64 mptc; + u64 bptc; + u64 tsctc; + u64 tsctfc; + u64 iac; + u64 icrxptc; + u64 icrxatc; + u64 ictxptc; + u64 ictxatc; + u64 ictxqec; + u64 ictxqmtc; + u64 icrxdmtc; + u64 icrxoc; + u64 cbtmpc; + u64 htdpmc; + u64 cbrdpc; + u64 cbrmpc; + u64 rpthc; + u64 hgptc; + u64 htcbdpc; + u64 hgorc; + u64 hgotc; + u64 lenerrs; + u64 scvpc; + u64 hrmpc; + u64 doosync; + u64 o2bgptc; + u64 o2bspc; + u64 b2ospc; + u64 b2ogprc; +}; + + +struct e1000_phy_stats { + u32 idle_errors; + u32 receive_errors; +}; + +struct e1000_host_mng_dhcp_cookie { + u32 signature; + u8 status; + u8 reserved0; + u16 vlan_id; + u32 reserved1; + u16 reserved2; + u8 reserved3; + u8 checksum; +}; + +/* Host Interface "Rev 1" */ +struct e1000_host_command_header { + u8 command_id; + u8 command_length; + u8 command_options; + u8 checksum; +}; + +#define E1000_HI_MAX_DATA_LENGTH 252 +struct e1000_host_command_info { + struct e1000_host_command_header command_header; + u8 command_data[E1000_HI_MAX_DATA_LENGTH]; +}; + +/* Host Interface "Rev 2" */ +struct e1000_host_mng_command_header { + u8 command_id; + u8 checksum; + u16 reserved1; + u16 reserved2; + u16 command_length; +}; + +#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 +struct e1000_host_mng_command_info { + struct e1000_host_mng_command_header command_header; + u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; +}; + +#include "e1000_mac.h" +#include "e1000_phy.h" +#include "e1000_nvm.h" +#include "e1000_manage.h" +#include "e1000_mbx.h" + +/* Function pointers for the MAC. */ +struct e1000_mac_operations { + s32 (*init_params)(struct e1000_hw *); + s32 (*id_led_init)(struct e1000_hw *); + s32 (*blink_led)(struct e1000_hw *); + bool (*check_mng_mode)(struct e1000_hw *); + s32 (*check_for_link)(struct e1000_hw *); + s32 (*cleanup_led)(struct e1000_hw *); + void (*clear_hw_cntrs)(struct e1000_hw *); + void (*clear_vfta)(struct e1000_hw *); + s32 (*get_bus_info)(struct e1000_hw *); + void (*set_lan_id)(struct e1000_hw *); + s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); + s32 (*led_on)(struct e1000_hw *); + s32 (*led_off)(struct e1000_hw *); + void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); + s32 (*reset_hw)(struct e1000_hw *); + s32 (*init_hw)(struct e1000_hw *); + void (*shutdown_serdes)(struct e1000_hw *); + void (*power_up_serdes)(struct e1000_hw *); + s32 (*setup_link)(struct e1000_hw *); + s32 (*setup_physical_interface)(struct e1000_hw *); + s32 (*setup_led)(struct e1000_hw *); + void (*write_vfta)(struct e1000_hw *, u32, u32); + void (*config_collision_dist)(struct e1000_hw *); + void (*rar_set)(struct e1000_hw *, u8*, u32); + s32 (*read_mac_addr)(struct e1000_hw *); + s32 (*validate_mdi_setting)(struct e1000_hw *); + s32 (*get_thermal_sensor_data)(struct e1000_hw *); + s32 (*init_thermal_sensor_thresh)(struct e1000_hw *); + s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); + void (*release_swfw_sync)(struct e1000_hw *, u16); +}; + +/* When to use various PHY register access functions: + * + * Func Caller + * Function Does Does When to use + * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * X_reg L,P,A n/a for simple PHY reg accesses + * X_reg_locked P,A L for multiple accesses of different regs + * on different pages + * X_reg_page A L,P for multiple accesses of different regs + * on the same page + * + * Where X=[read|write], L=locking, P=sets page, A=register access + * + */ +struct e1000_phy_operations { + s32 (*init_params)(struct e1000_hw *); + s32 (*acquire)(struct e1000_hw *); + s32 (*check_polarity)(struct e1000_hw *); + s32 (*check_reset_block)(struct e1000_hw *); + s32 (*commit)(struct e1000_hw *); + s32 (*force_speed_duplex)(struct e1000_hw *); + s32 (*get_cfg_done)(struct e1000_hw *hw); + s32 (*get_cable_length)(struct e1000_hw *); + s32 (*get_info)(struct e1000_hw *); + s32 (*set_page)(struct e1000_hw *, u16); + s32 (*read_reg)(struct e1000_hw *, u32, u16 *); + s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); + s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *); + void (*release)(struct e1000_hw *); + s32 (*reset)(struct e1000_hw *); + s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); + s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); + s32 (*write_reg)(struct e1000_hw *, u32, u16); + s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); + s32 (*write_reg_page)(struct e1000_hw *, u32, u16); + void (*power_up)(struct e1000_hw *); + void (*power_down)(struct e1000_hw *); + s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); + s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); +}; + +/* Function pointers for the NVM. */ +struct e1000_nvm_operations { + s32 (*init_params)(struct e1000_hw *); + s32 (*acquire)(struct e1000_hw *); + s32 (*read)(struct e1000_hw *, u16, u16, u16 *); + void (*release)(struct e1000_hw *); + void (*reload)(struct e1000_hw *); + s32 (*update)(struct e1000_hw *); + s32 (*valid_led_default)(struct e1000_hw *, u16 *); + s32 (*validate)(struct e1000_hw *); + s32 (*write)(struct e1000_hw *, u16, u16, u16 *); +}; + +#define E1000_MAX_SENSORS 3 + +struct e1000_thermal_diode_data { + u8 location; + u8 temp; + u8 caution_thresh; + u8 max_op_thresh; +}; + +struct e1000_thermal_sensor_data { + struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS]; +}; + +struct e1000_mac_info { + struct e1000_mac_operations ops; + u8 addr[ETH_ADDR_LEN]; + u8 perm_addr[ETH_ADDR_LEN]; + + enum e1000_mac_type type; + + u32 collision_delta; + u32 ledctl_default; + u32 ledctl_mode1; + u32 ledctl_mode2; + u32 mc_filter_type; + u32 tx_packet_delta; + u32 txcw; + + u16 current_ifs_val; + u16 ifs_max_val; + u16 ifs_min_val; + u16 ifs_ratio; + u16 ifs_step_size; + u16 mta_reg_count; + u16 uta_reg_count; + + /* Maximum size of the MTA register table in all supported adapters */ + #define MAX_MTA_REG 128 + u32 mta_shadow[MAX_MTA_REG]; + u16 rar_entry_count; + + u8 forced_speed_duplex; + + bool adaptive_ifs; + bool has_fwsm; + bool arc_subsystem_valid; + bool asf_firmware_present; + bool autoneg; + bool autoneg_failed; + bool get_link_status; + bool in_ifs_mode; + enum e1000_serdes_link_state serdes_link_state; + bool serdes_has_link; + bool tx_pkt_filtering; + struct e1000_thermal_sensor_data thermal_sensor_data; +}; + +struct e1000_phy_info { + struct e1000_phy_operations ops; + enum e1000_phy_type type; + + enum e1000_1000t_rx_status local_rx; + enum e1000_1000t_rx_status remote_rx; + enum e1000_ms_type ms_type; + enum e1000_ms_type original_ms_type; + enum e1000_rev_polarity cable_polarity; + enum e1000_smart_speed smart_speed; + + u32 addr; + u32 id; + u32 reset_delay_us; /* in usec */ + u32 revision; + + enum e1000_media_type media_type; + + u16 autoneg_advertised; + u16 autoneg_mask; + u16 cable_length; + u16 max_cable_length; + u16 min_cable_length; + + u8 mdix; + + bool disable_polarity_correction; + bool is_mdix; + bool polarity_correction; + bool reset_disable; + bool speed_downgraded; + bool autoneg_wait_to_complete; +}; + +struct e1000_nvm_info { + struct e1000_nvm_operations ops; + enum e1000_nvm_type type; + enum e1000_nvm_override override; + + u32 flash_bank_size; + u32 flash_base_addr; + + u16 word_size; + u16 delay_usec; + u16 address_bits; + u16 opcode_bits; + u16 page_size; +}; + +struct e1000_bus_info { + enum e1000_bus_type type; + enum e1000_bus_speed speed; + enum e1000_bus_width width; + + u16 func; + u16 pci_cmd_word; +}; + +struct e1000_fc_info { + u32 high_water; /* Flow control high-water mark */ + u32 low_water; /* Flow control low-water mark */ + u16 pause_time; /* Flow control pause timer */ + u16 refresh_time; /* Flow control refresh timer */ + bool send_xon; /* Flow control send XON */ + bool strict_ieee; /* Strict IEEE mode */ + enum e1000_fc_mode current_mode; /* FC mode in effect */ + enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ +}; + +struct e1000_mbx_operations { + s32 (*init_params)(struct e1000_hw *hw); + s32 (*read)(struct e1000_hw *, u32 *, u16, u16); + s32 (*write)(struct e1000_hw *, u32 *, u16, u16); + s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); + s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); + s32 (*check_for_msg)(struct e1000_hw *, u16); + s32 (*check_for_ack)(struct e1000_hw *, u16); + s32 (*check_for_rst)(struct e1000_hw *, u16); +}; + +struct e1000_mbx_stats { + u32 msgs_tx; + u32 msgs_rx; + + u32 acks; + u32 reqs; + u32 rsts; +}; + +struct e1000_mbx_info { + struct e1000_mbx_operations ops; + struct e1000_mbx_stats stats; + u32 timeout; + u32 usec_delay; + u16 size; +}; + +struct e1000_dev_spec_82575 { + bool sgmii_active; + bool global_device_reset; + bool eee_disable; + bool module_plugged; + bool clear_semaphore_once; + u32 mtu; + struct sfp_e1000_flags eth_flags; + u8 media_port; + bool media_changed; +}; + +struct e1000_dev_spec_vf { + u32 vf_number; + u32 v2p_mailbox; +}; + +struct e1000_hw { + void *back; + + u8 __iomem *hw_addr; + u8 __iomem *flash_address; + unsigned long io_base; + + struct e1000_mac_info mac; + struct e1000_fc_info fc; + struct e1000_phy_info phy; + struct e1000_nvm_info nvm; + struct e1000_bus_info bus; + struct e1000_mbx_info mbx; + struct e1000_host_mng_dhcp_cookie mng_cookie; + + union { + struct e1000_dev_spec_82575 _82575; + struct e1000_dev_spec_vf vf; + } dev_spec; + + u16 device_id; + u16 subsystem_vendor_id; + u16 subsystem_device_id; + u16 vendor_id; + + u8 revision_id; +}; + +#include "e1000_82575.h" +#include "e1000_i210.h" + +/* These functions must be implemented by drivers */ +s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); +s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c new file mode 100644 index 00000000..1e9f3e6e --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c @@ -0,0 +1,909 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + + +static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw); +static void e1000_release_nvm_i210(struct e1000_hw *hw); +static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw); +static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); +static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw); +static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data); + +/** + * e1000_acquire_nvm_i210 - Request for access to EEPROM + * @hw: pointer to the HW structure + * + * Acquire the necessary semaphores for exclusive access to the EEPROM. + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -E1000_ERR_NVM (-1). + **/ +static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_acquire_nvm_i210"); + + ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); + + return ret_val; +} + +/** + * e1000_release_nvm_i210 - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit, + * then release the semaphores acquired. + **/ +static void e1000_release_nvm_i210(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_release_nvm_i210"); + + e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); +} + +/** + * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ + + DEBUGFUNC("e1000_acquire_swfw_sync_i210"); + + while (i < timeout) { + if (e1000_get_hw_semaphore_i210(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore_generic(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync_i210 - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync_i210"); + + while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); +} + +/** + * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore_i210"); + + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { + /* In rare circumstances, the SW semaphore may already be held + * unintentionally. Clear the semaphore once before giving up. + */ + if (hw->dev_spec._82575.clear_semaphore_once) { + hw->dev_spec._82575.clear_semaphore_once = false; + e1000_put_hw_semaphore_generic(hw); + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + } + } + + /* If we do not have the semaphore here, we have to give up. */ + if (i == timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore_generic(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register + * @hw: pointer to the HW structure + * @offset: offset of word in the Shadow Ram to read + * @words: number of words to read + * @data: word read from the Shadow Ram + * + * Reads a 16 bit word from the Shadow Ram using the EERD register. + * Uses necessary synchronization semaphores. + **/ +s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + s32 status = E1000_SUCCESS; + u16 i, count; + + DEBUGFUNC("e1000_read_nvm_srrd_i210"); + + /* We cannot hold synchronization semaphores for too long, + * because of forceful takeover procedure. However it is more efficient + * to read in bursts than synchronizing access for each word. */ + for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { + count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? + E1000_EERD_EEWR_MAX_COUNT : (words - i); + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + status = e1000_read_nvm_eerd(hw, offset, count, + data + i); + hw->nvm.ops.release(hw); + } else { + status = E1000_ERR_SWFW_SYNC; + } + + if (status != E1000_SUCCESS) + break; + } + + return status; +} + +/** + * e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR + * @hw: pointer to the HW structure + * @offset: offset within the Shadow RAM to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the Shadow RAM + * + * Writes data to Shadow RAM at offset using EEWR register. + * + * If e1000_update_nvm_checksum is not called after this function , the + * data will not be committed to FLASH and also Shadow RAM will most likely + * contain an invalid checksum. + * + * If error code is returned, data and Shadow RAM may be inconsistent - buffer + * partially written. + **/ +s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + s32 status = E1000_SUCCESS; + u16 i, count; + + DEBUGFUNC("e1000_write_nvm_srwr_i210"); + + /* We cannot hold synchronization semaphores for too long, + * because of forceful takeover procedure. However it is more efficient + * to write in bursts than synchronizing access for each word. */ + for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { + count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? + E1000_EERD_EEWR_MAX_COUNT : (words - i); + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + status = e1000_write_nvm_srwr(hw, offset, count, + data + i); + hw->nvm.ops.release(hw); + } else { + status = E1000_ERR_SWFW_SYNC; + } + + if (status != E1000_SUCCESS) + break; + } + + return status; +} + +/** + * e1000_write_nvm_srwr - Write to Shadow Ram using EEWR + * @hw: pointer to the HW structure + * @offset: offset within the Shadow Ram to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the Shadow Ram + * + * Writes data to Shadow Ram at offset using EEWR register. + * + * If e1000_update_nvm_checksum is not called after this function , the + * Shadow Ram will most likely contain an invalid checksum. + **/ +static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 i, k, eewr = 0; + u32 attempts = 100000; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_write_nvm_srwr"); + + /* + * A check for invalid values: offset too large, too many words, + * too many words for the offset, and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + ret_val = -E1000_ERR_NVM; + goto out; + } + + for (i = 0; i < words; i++) { + eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | + (data[i] << E1000_NVM_RW_REG_DATA) | + E1000_NVM_RW_REG_START; + + E1000_WRITE_REG(hw, E1000_SRWR, eewr); + + for (k = 0; k < attempts; k++) { + if (E1000_NVM_RW_REG_DONE & + E1000_READ_REG(hw, E1000_SRWR)) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(5); + } + + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("Shadow RAM write EEWR timed out\n"); + break; + } + } + +out: + return ret_val; +} + +/** e1000_read_invm_word_i210 - Reads OTP + * @hw: pointer to the HW structure + * @address: the word address (aka eeprom offset) to read + * @data: pointer to the data read + * + * Reads 16-bit words from the OTP. Return error when the word is not + * stored in OTP. + **/ +static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data) +{ + s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; + u32 invm_dword; + u16 i; + u8 record_type, word_address; + + DEBUGFUNC("e1000_read_invm_word_i210"); + + for (i = 0; i < E1000_INVM_SIZE; i++) { + invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); + /* Get record type */ + record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword); + if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE) + break; + if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE) + i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS; + if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE) + i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS; + if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) { + word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword); + if (word_address == address) { + *data = INVM_DWORD_TO_WORD_DATA(invm_dword); + DEBUGOUT2("Read INVM Word 0x%02x = %x", + address, *data); + status = E1000_SUCCESS; + break; + } + } + } + if (status != E1000_SUCCESS) + DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address); + return status; +} + +/** e1000_read_invm_i210 - Read invm wrapper function for I210/I211 + * @hw: pointer to the HW structure + * @address: the word address (aka eeprom offset) to read + * @data: pointer to the data read + * + * Wrapper function to return data formerly found in the NVM. + **/ +static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset, + u16 E1000_UNUSEDARG words, u16 *data) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_read_invm_i210"); + + /* Only the MAC addr is required to be present in the iNVM */ + switch (offset) { + case NVM_MAC_ADDR: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]); + ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1, + &data[1]); + ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2, + &data[2]); + if (ret_val != E1000_SUCCESS) + DEBUGOUT("MAC Addr not found in iNVM\n"); + break; + case NVM_INIT_CTRL_2: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_INIT_CTRL_2_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_INIT_CTRL_4: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_INIT_CTRL_4_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_LED_1_CFG: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_LED_1_CFG_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_LED_0_2_CFG: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_LED_0_2_CFG_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_ID_LED_SETTINGS: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = ID_LED_RESERVED_FFFF; + ret_val = E1000_SUCCESS; + } + break; + case NVM_SUB_DEV_ID: + *data = hw->subsystem_device_id; + break; + case NVM_SUB_VEN_ID: + *data = hw->subsystem_vendor_id; + break; + case NVM_DEV_ID: + *data = hw->device_id; + break; + case NVM_VEN_ID: + *data = hw->vendor_id; + break; + default: + DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset); + *data = NVM_RESERVED_WORD; + break; + } + return ret_val; +} + +/** + * e1000_read_invm_version - Reads iNVM version and image type + * @hw: pointer to the HW structure + * @invm_ver: version structure for the version read + * + * Reads iNVM version and image type. + **/ +s32 e1000_read_invm_version(struct e1000_hw *hw, + struct e1000_fw_version *invm_ver) +{ + u32 *record = NULL; + u32 *next_record = NULL; + u32 i = 0; + u32 invm_dword = 0; + u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE / + E1000_INVM_RECORD_SIZE_IN_BYTES); + u32 buffer[E1000_INVM_SIZE]; + s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; + u16 version = 0; + + DEBUGFUNC("e1000_read_invm_version"); + + /* Read iNVM memory */ + for (i = 0; i < E1000_INVM_SIZE; i++) { + invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); + buffer[i] = invm_dword; + } + + /* Read version number */ + for (i = 1; i < invm_blocks; i++) { + record = &buffer[invm_blocks - i]; + next_record = &buffer[invm_blocks - i + 1]; + + /* Check if we have first version location used */ + if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) { + version = 0; + status = E1000_SUCCESS; + break; + } + /* Check if we have second version location used */ + else if ((i == 1) && + ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { + version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + status = E1000_SUCCESS; + break; + } + /* + * Check if we have odd version location + * used and it is the last one used + */ + else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && + ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && + (i != 1))) { + version = (*next_record & E1000_INVM_VER_FIELD_TWO) + >> 13; + status = E1000_SUCCESS; + break; + } + /* + * Check if we have even version location + * used and it is the last one used + */ + else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && + ((*record & 0x3) == 0)) { + version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + status = E1000_SUCCESS; + break; + } + } + + if (status == E1000_SUCCESS) { + invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) + >> E1000_INVM_MAJOR_SHIFT; + invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; + } + /* Read Image Type */ + for (i = 1; i < invm_blocks; i++) { + record = &buffer[invm_blocks - i]; + next_record = &buffer[invm_blocks - i + 1]; + + /* Check if we have image type in first location used */ + if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) { + invm_ver->invm_img_type = 0; + status = E1000_SUCCESS; + break; + } + /* Check if we have image type in first location used */ + else if ((((*record & 0x3) == 0) && + ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || + ((((*record & 0x3) != 0) && (i != 1)))) { + invm_ver->invm_img_type = + (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; + status = E1000_SUCCESS; + break; + } + } + return status; +} + +/** + * e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + **/ +s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *); + + DEBUGFUNC("e1000_validate_nvm_checksum_i210"); + + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + + /* + * Replace the read function with semaphore grabbing with + * the one that skips this for a while. + * We have semaphore taken already here. + */ + read_op_ptr = hw->nvm.ops.read; + hw->nvm.ops.read = e1000_read_nvm_eerd; + + status = e1000_validate_nvm_checksum_generic(hw); + + /* Revert original read operation. */ + hw->nvm.ops.read = read_op_ptr; + + hw->nvm.ops.release(hw); + } else { + status = E1000_ERR_SWFW_SYNC; + } + + return status; +} + + +/** + * e1000_update_nvm_checksum_i210 - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. Next commit EEPROM data onto the Flash. + **/ +s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_update_nvm_checksum_i210"); + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data); + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("EEPROM read failed\n"); + goto out; + } + + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + /* + * Do not use hw->nvm.ops.write, hw->nvm.ops.read + * because we do not want to take the synchronization + * semaphores twice here. + */ + + for (i = 0; i < NVM_CHECKSUM_REG; i++) { + ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data); + if (ret_val) { + hw->nvm.ops.release(hw); + DEBUGOUT("NVM Read Error while updating checksum.\n"); + goto out; + } + checksum += nvm_data; + } + checksum = (u16) NVM_SUM - checksum; + ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, + &checksum); + if (ret_val != E1000_SUCCESS) { + hw->nvm.ops.release(hw); + DEBUGOUT("NVM Write Error while updating checksum.\n"); + goto out; + } + + hw->nvm.ops.release(hw); + + ret_val = e1000_update_flash_i210(hw); + } else { + ret_val = E1000_ERR_SWFW_SYNC; + } +out: + return ret_val; +} + +/** + * e1000_get_flash_presence_i210 - Check if flash device is detected. + * @hw: pointer to the HW structure + * + **/ +bool e1000_get_flash_presence_i210(struct e1000_hw *hw) +{ + u32 eec = 0; + bool ret_val = false; + + DEBUGFUNC("e1000_get_flash_presence_i210"); + + eec = E1000_READ_REG(hw, E1000_EECD); + + if (eec & E1000_EECD_FLASH_DETECTED_I210) + ret_val = true; + + return ret_val; +} + +/** + * e1000_update_flash_i210 - Commit EEPROM to the flash + * @hw: pointer to the HW structure + * + **/ +s32 e1000_update_flash_i210(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 flup; + + DEBUGFUNC("e1000_update_flash_i210"); + + ret_val = e1000_pool_flash_update_done_i210(hw); + if (ret_val == -E1000_ERR_NVM) { + DEBUGOUT("Flash update time out\n"); + goto out; + } + + flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210; + E1000_WRITE_REG(hw, E1000_EECD, flup); + + ret_val = e1000_pool_flash_update_done_i210(hw); + if (ret_val == E1000_SUCCESS) + DEBUGOUT("Flash update complete\n"); + else + DEBUGOUT("Flash update time out\n"); + +out: + return ret_val; +} + +/** + * e1000_pool_flash_update_done_i210 - Pool FLUDONE status. + * @hw: pointer to the HW structure + * + **/ +s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw) +{ + s32 ret_val = -E1000_ERR_NVM; + u32 i, reg; + + DEBUGFUNC("e1000_pool_flash_update_done_i210"); + + for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) { + reg = E1000_READ_REG(hw, E1000_EECD); + if (reg & E1000_EECD_FLUDONE_I210) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(5); + } + + return ret_val; +} + +/** + * e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers + * @hw: pointer to the HW structure + * + * Initialize the i210/i211 NVM parameters and function pointers. + **/ +static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + struct e1000_nvm_info *nvm = &hw->nvm; + + DEBUGFUNC("e1000_init_nvm_params_i210"); + + ret_val = e1000_init_nvm_params_82575(hw); + nvm->ops.acquire = e1000_acquire_nvm_i210; + nvm->ops.release = e1000_release_nvm_i210; + nvm->ops.valid_led_default = e1000_valid_led_default_i210; + if (e1000_get_flash_presence_i210(hw)) { + hw->nvm.type = e1000_nvm_flash_hw; + nvm->ops.read = e1000_read_nvm_srrd_i210; + nvm->ops.write = e1000_write_nvm_srwr_i210; + nvm->ops.validate = e1000_validate_nvm_checksum_i210; + nvm->ops.update = e1000_update_nvm_checksum_i210; + } else { + hw->nvm.type = e1000_nvm_invm; + nvm->ops.read = e1000_read_invm_i210; + nvm->ops.write = e1000_null_write_nvm; + nvm->ops.validate = e1000_null_ops_generic; + nvm->ops.update = e1000_null_ops_generic; + } + return ret_val; +} + +/** + * e1000_init_function_pointers_i210 - Init func ptrs. + * @hw: pointer to the HW structure + * + * Called to initialize all function pointers and parameters. + **/ +void e1000_init_function_pointers_i210(struct e1000_hw *hw) +{ + e1000_init_function_pointers_82575(hw); + hw->nvm.ops.init_params = e1000_init_nvm_params_i210; + + return; +} + +/** + * e1000_valid_led_default_i210 - Verify a valid default LED config + * @hw: pointer to the HW structure + * @data: pointer to the NVM (EEPROM) + * + * Read the EEPROM for the current default LED configuration. If the + * LED configuration is not valid, set to a valid LED configuration. + **/ +static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_valid_led_default_i210"); + + ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { + switch (hw->phy.media_type) { + case e1000_media_type_internal_serdes: + *data = ID_LED_DEFAULT_I210_SERDES; + break; + case e1000_media_type_copper: + default: + *data = ID_LED_DEFAULT_I210; + break; + } + } +out: + return ret_val; +} + +/** + * __e1000_access_xmdio_reg - Read/write XMDIO register + * @hw: pointer to the HW structure + * @address: XMDIO address to program + * @dev_addr: device address to program + * @data: pointer to value to read/write from/to the XMDIO address + * @read: boolean flag to indicate read or write + **/ +static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address, + u8 dev_addr, u16 *data, bool read) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("__e1000_access_xmdio_reg"); + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA | + dev_addr); + if (ret_val) + return ret_val; + + if (read) + ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data); + else + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data); + if (ret_val) + return ret_val; + + /* Recalibrate the device back to 0 */ + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0); + if (ret_val) + return ret_val; + + return ret_val; +} + +/** + * e1000_read_xmdio_reg - Read XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be read from the EMI address + **/ +s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data) +{ + DEBUGFUNC("e1000_read_xmdio_reg"); + + return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, true); +} + +/** + * e1000_write_xmdio_reg - Write XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be written to the XMDIO address + **/ +s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) +{ + DEBUGFUNC("e1000_read_xmdio_reg"); + + return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, false); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h new file mode 100644 index 00000000..57b2eb56 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h @@ -0,0 +1,91 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_I210_H_ +#define _E1000_I210_H_ + +bool e1000_get_flash_presence_i210(struct e1000_hw *hw); +s32 e1000_update_flash_i210(struct e1000_hw *hw); +s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw); +s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw); +s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, + u16 words, u16 *data); +s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, + u16 words, u16 *data); +s32 e1000_read_invm_version(struct e1000_hw *hw, + struct e1000_fw_version *invm_ver); +s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); +void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); +s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, + u16 *data); +s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, + u16 data); + +#define E1000_STM_OPCODE 0xDB00 +#define E1000_EEPROM_FLASH_SIZE_WORD 0x11 + +#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \ + (u8)((invm_dword) & 0x7) +#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \ + (u8)(((invm_dword) & 0x0000FE00) >> 9) +#define INVM_DWORD_TO_WORD_DATA(invm_dword) \ + (u16)(((invm_dword) & 0xFFFF0000) >> 16) + +enum E1000_INVM_STRUCTURE_TYPE { + E1000_INVM_UNINITIALIZED_STRUCTURE = 0x00, + E1000_INVM_WORD_AUTOLOAD_STRUCTURE = 0x01, + E1000_INVM_CSR_AUTOLOAD_STRUCTURE = 0x02, + E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE = 0x03, + E1000_INVM_RSA_KEY_SHA256_STRUCTURE = 0x04, + E1000_INVM_INVALIDATED_STRUCTURE = 0x0F, +}; + +#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8 +#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1 +#define E1000_INVM_ULT_BYTES_SIZE 8 +#define E1000_INVM_RECORD_SIZE_IN_BYTES 4 +#define E1000_INVM_VER_FIELD_ONE 0x1FF8 +#define E1000_INVM_VER_FIELD_TWO 0x7FE000 +#define E1000_INVM_IMGTYPE_FIELD 0x1F800000 + +#define E1000_INVM_MAJOR_MASK 0x3F0 +#define E1000_INVM_MINOR_MASK 0xF +#define E1000_INVM_MAJOR_SHIFT 4 + +#define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_OFF1_OFF2)) +#define ID_LED_DEFAULT_I210_SERDES ((ID_LED_DEF1_DEF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_OFF1_ON2)) + +/* NVM offset defaults for I211 devices */ +#define NVM_INIT_CTRL_2_DEFAULT_I211 0X7243 +#define NVM_INIT_CTRL_4_DEFAULT_I211 0x00C1 +#define NVM_LED_1_CFG_DEFAULT_I211 0x0184 +#define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c new file mode 100644 index 00000000..4ee59ba9 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c @@ -0,0 +1,2096 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw); +static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw); +static void e1000_config_collision_dist_generic(struct e1000_hw *hw); +static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index); + +/** + * e1000_init_mac_ops_generic - Initialize MAC function pointers + * @hw: pointer to the HW structure + * + * Setups up the function pointers to no-op functions + **/ +void e1000_init_mac_ops_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + DEBUGFUNC("e1000_init_mac_ops_generic"); + + /* General Setup */ + mac->ops.init_params = e1000_null_ops_generic; + mac->ops.init_hw = e1000_null_ops_generic; + mac->ops.reset_hw = e1000_null_ops_generic; + mac->ops.setup_physical_interface = e1000_null_ops_generic; + mac->ops.get_bus_info = e1000_null_ops_generic; + mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie; + mac->ops.read_mac_addr = e1000_read_mac_addr_generic; + mac->ops.config_collision_dist = e1000_config_collision_dist_generic; + mac->ops.clear_hw_cntrs = e1000_null_mac_generic; + /* LED */ + mac->ops.cleanup_led = e1000_null_ops_generic; + mac->ops.setup_led = e1000_null_ops_generic; + mac->ops.blink_led = e1000_null_ops_generic; + mac->ops.led_on = e1000_null_ops_generic; + mac->ops.led_off = e1000_null_ops_generic; + /* LINK */ + mac->ops.setup_link = e1000_null_ops_generic; + mac->ops.get_link_up_info = e1000_null_link_info; + mac->ops.check_for_link = e1000_null_ops_generic; + /* Management */ + mac->ops.check_mng_mode = e1000_null_mng_mode; + /* VLAN, MC, etc. */ + mac->ops.update_mc_addr_list = e1000_null_update_mc; + mac->ops.clear_vfta = e1000_null_mac_generic; + mac->ops.write_vfta = e1000_null_write_vfta; + mac->ops.rar_set = e1000_rar_set_generic; + mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic; +} + +/** + * e1000_null_ops_generic - No-op function, returns 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_ops_generic"); + return E1000_SUCCESS; +} + +/** + * e1000_null_mac_generic - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_mac_generic"); + return; +} + +/** + * e1000_null_link_info - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d) +{ + DEBUGFUNC("e1000_null_link_info"); + return E1000_SUCCESS; +} + +/** + * e1000_null_mng_mode - No-op function, return false + * @hw: pointer to the HW structure + **/ +bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_mng_mode"); + return false; +} + +/** + * e1000_null_update_mc - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) +{ + DEBUGFUNC("e1000_null_update_mc"); + return; +} + +/** + * e1000_null_write_vfta - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b) +{ + DEBUGFUNC("e1000_null_write_vfta"); + return; +} + +/** + * e1000_null_rar_set - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) +{ + DEBUGFUNC("e1000_null_rar_set"); + return; +} + +/** + * e1000_get_bus_info_pcie_generic - Get PCIe bus information + * @hw: pointer to the HW structure + * + * Determines and stores the system bus information for a particular + * network interface. The following bus information is determined and stored: + * bus speed, bus width, type (PCIe), and PCIe function. + **/ +s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + struct e1000_bus_info *bus = &hw->bus; + s32 ret_val; + u16 pcie_link_status; + + DEBUGFUNC("e1000_get_bus_info_pcie_generic"); + + bus->type = e1000_bus_type_pci_express; + + ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS, + &pcie_link_status); + if (ret_val) { + bus->width = e1000_bus_width_unknown; + bus->speed = e1000_bus_speed_unknown; + } else { + switch (pcie_link_status & PCIE_LINK_SPEED_MASK) { + case PCIE_LINK_SPEED_2500: + bus->speed = e1000_bus_speed_2500; + break; + case PCIE_LINK_SPEED_5000: + bus->speed = e1000_bus_speed_5000; + break; + default: + bus->speed = e1000_bus_speed_unknown; + break; + } + + bus->width = (enum e1000_bus_width)((pcie_link_status & + PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT); + } + + mac->ops.set_lan_id(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices + * + * @hw: pointer to the HW structure + * + * Determines the LAN function id by reading memory-mapped registers + * and swaps the port value if requested. + **/ +static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) +{ + struct e1000_bus_info *bus = &hw->bus; + u32 reg; + + /* The status register reports the correct function number + * for the device regardless of function swap state. + */ + reg = E1000_READ_REG(hw, E1000_STATUS); + bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; +} + +/** + * e1000_set_lan_id_single_port - Set LAN id for a single port device + * @hw: pointer to the HW structure + * + * Sets the LAN function id to zero for a single port device. + **/ +void e1000_set_lan_id_single_port(struct e1000_hw *hw) +{ + struct e1000_bus_info *bus = &hw->bus; + + bus->func = 0; +} + +/** + * e1000_clear_vfta_generic - Clear VLAN filter table + * @hw: pointer to the HW structure + * + * Clears the register array which contains the VLAN filter table by + * setting all the values to 0. + **/ +void e1000_clear_vfta_generic(struct e1000_hw *hw) +{ + u32 offset; + + DEBUGFUNC("e1000_clear_vfta_generic"); + + for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); + E1000_WRITE_FLUSH(hw); + } +} + +/** + * e1000_write_vfta_generic - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: register offset in VLAN filter table + * @value: register value written to VLAN filter table + * + * Writes value at the given offset in the register array which stores + * the VLAN filter table. + **/ +void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value) +{ + DEBUGFUNC("e1000_write_vfta_generic"); + + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_init_rx_addrs_generic - Initialize receive address's + * @hw: pointer to the HW structure + * @rar_count: receive address registers + * + * Setup the receive address registers by setting the base receive address + * register to the devices MAC address and clearing all the other receive + * address registers to 0. + **/ +void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count) +{ + u32 i; + u8 mac_addr[ETH_ADDR_LEN] = {0}; + + DEBUGFUNC("e1000_init_rx_addrs_generic"); + + /* Setup the receive address */ + DEBUGOUT("Programming MAC Address into RAR[0]\n"); + + hw->mac.ops.rar_set(hw, hw->mac.addr, 0); + + /* Zero out the other (rar_entry_count - 1) receive addresses */ + DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1); + for (i = 1; i < rar_count; i++) + hw->mac.ops.rar_set(hw, mac_addr, i); +} + +/** + * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr + * @hw: pointer to the HW structure + * + * Checks the nvm for an alternate MAC address. An alternate MAC address + * can be setup by pre-boot software and must be treated like a permanent + * address and must override the actual permanent MAC address. If an + * alternate MAC address is found it is programmed into RAR0, replacing + * the permanent address that was installed into RAR0 by the Si on reset. + * This function will return SUCCESS unless it encounters an error while + * reading the EEPROM. + **/ +s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) +{ + u32 i; + s32 ret_val; + u16 offset, nvm_alt_mac_addr_offset, nvm_data; + u8 alt_mac_addr[ETH_ADDR_LEN]; + + DEBUGFUNC("e1000_check_alt_mac_addr_generic"); + + ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data); + if (ret_val) + return ret_val; + + + /* Alternate MAC address is handled by the option ROM for 82580 + * and newer. SW support not required. + */ + if (hw->mac.type >= e1000_82580) + return E1000_SUCCESS; + + ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1, + &nvm_alt_mac_addr_offset); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if ((nvm_alt_mac_addr_offset == 0xFFFF) || + (nvm_alt_mac_addr_offset == 0x0000)) + /* There is no Alternate MAC Address */ + return E1000_SUCCESS; + + if (hw->bus.func == E1000_FUNC_1) + nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; + if (hw->bus.func == E1000_FUNC_2) + nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2; + + if (hw->bus.func == E1000_FUNC_3) + nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3; + for (i = 0; i < ETH_ADDR_LEN; i += 2) { + offset = nvm_alt_mac_addr_offset + (i >> 1); + ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + alt_mac_addr[i] = (u8)(nvm_data & 0xFF); + alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); + } + + /* if multicast bit is set, the alternate address will not be used */ + if (alt_mac_addr[0] & 0x01) { + DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n"); + return E1000_SUCCESS; + } + + /* We have a valid alternate MAC address, and we want to treat it the + * same as the normal permanent MAC address stored by the HW into the + * RAR. Do this by mapping this address into RAR0. + */ + hw->mac.ops.rar_set(hw, alt_mac_addr, 0); + + return E1000_SUCCESS; +} + +/** + * e1000_rar_set_generic - Set receive address register + * @hw: pointer to the HW structure + * @addr: pointer to the receive address + * @index: receive address array register + * + * Sets the receive address array register at index to the address passed + * in by addr. + **/ +static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index) +{ + u32 rar_low, rar_high; + + DEBUGFUNC("e1000_rar_set_generic"); + + /* HW expects these in little endian so we reverse the byte order + * from network order (big endian) to little endian + */ + rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | + ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); + + rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + + /* If MAC address zero, no need to set the AV bit */ + if (rar_low || rar_high) + rar_high |= E1000_RAH_AV; + + /* Some bridges will combine consecutive 32-bit writes into + * a single burst write, which will malfunction on some parts. + * The flushes avoid this. + */ + E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); + E1000_WRITE_FLUSH(hw); + E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_hash_mc_addr_generic - Generate a multicast hash value + * @hw: pointer to the HW structure + * @mc_addr: pointer to a multicast address + * + * Generates a multicast address hash value which is used to determine + * the multicast filter table array address and new table value. + **/ +u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr) +{ + u32 hash_value, hash_mask; + u8 bit_shift = 0; + + DEBUGFUNC("e1000_hash_mc_addr_generic"); + + /* Register count multiplied by bits per register */ + hash_mask = (hw->mac.mta_reg_count * 32) - 1; + + /* For a mc_filter_type of 0, bit_shift is the number of left-shifts + * where 0xFF would still fall within the hash mask. + */ + while (hash_mask >> bit_shift != 0xFF) + bit_shift++; + + /* The portion of the address that is used for the hash table + * is determined by the mc_filter_type setting. + * The algorithm is such that there is a total of 8 bits of shifting. + * The bit_shift for a mc_filter_type of 0 represents the number of + * left-shifts where the MSB of mc_addr[5] would still fall within + * the hash_mask. Case 0 does this exactly. Since there are a total + * of 8 bits of shifting, then mc_addr[4] will shift right the + * remaining number of bits. Thus 8 - bit_shift. The rest of the + * cases are a variation of this algorithm...essentially raising the + * number of bits to shift mc_addr[5] left, while still keeping the + * 8-bit shifting total. + * + * For example, given the following Destination MAC Address and an + * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), + * we can see that the bit_shift for case 0 is 4. These are the hash + * values resulting from each mc_filter_type... + * [0] [1] [2] [3] [4] [5] + * 01 AA 00 12 34 56 + * LSB MSB + * + * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 + * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 + * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 + * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 + */ + switch (hw->mac.mc_filter_type) { + default: + case 0: + break; + case 1: + bit_shift += 1; + break; + case 2: + bit_shift += 2; + break; + case 3: + bit_shift += 4; + break; + } + + hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | + (((u16) mc_addr[5]) << bit_shift))); + + return hash_value; +} + +/** + * e1000_update_mc_addr_list_generic - Update Multicast addresses + * @hw: pointer to the HW structure + * @mc_addr_list: array of multicast addresses to program + * @mc_addr_count: number of multicast addresses to program + * + * Updates entire Multicast Table Array. + * The caller must have a packed mc_addr_list of multicast addresses. + **/ +void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, + u8 *mc_addr_list, u32 mc_addr_count) +{ + u32 hash_value, hash_bit, hash_reg; + int i; + + DEBUGFUNC("e1000_update_mc_addr_list_generic"); + + /* clear mta_shadow */ + memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); + + /* update mta_shadow from mc_addr_list */ + for (i = 0; (u32) i < mc_addr_count; i++) { + hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list); + + hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); + hash_bit = hash_value & 0x1F; + + hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); + mc_addr_list += (ETH_ADDR_LEN); + } + + /* replace the entire MTA table */ + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) + E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters + * @hw: pointer to the HW structure + * + * Clears the base hardware counters by reading the counter registers. + **/ +void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_clear_hw_cntrs_base_generic"); + + E1000_READ_REG(hw, E1000_CRCERRS); + E1000_READ_REG(hw, E1000_SYMERRS); + E1000_READ_REG(hw, E1000_MPC); + E1000_READ_REG(hw, E1000_SCC); + E1000_READ_REG(hw, E1000_ECOL); + E1000_READ_REG(hw, E1000_MCC); + E1000_READ_REG(hw, E1000_LATECOL); + E1000_READ_REG(hw, E1000_COLC); + E1000_READ_REG(hw, E1000_DC); + E1000_READ_REG(hw, E1000_SEC); + E1000_READ_REG(hw, E1000_RLEC); + E1000_READ_REG(hw, E1000_XONRXC); + E1000_READ_REG(hw, E1000_XONTXC); + E1000_READ_REG(hw, E1000_XOFFRXC); + E1000_READ_REG(hw, E1000_XOFFTXC); + E1000_READ_REG(hw, E1000_FCRUC); + E1000_READ_REG(hw, E1000_GPRC); + E1000_READ_REG(hw, E1000_BPRC); + E1000_READ_REG(hw, E1000_MPRC); + E1000_READ_REG(hw, E1000_GPTC); + E1000_READ_REG(hw, E1000_GORCL); + E1000_READ_REG(hw, E1000_GORCH); + E1000_READ_REG(hw, E1000_GOTCL); + E1000_READ_REG(hw, E1000_GOTCH); + E1000_READ_REG(hw, E1000_RNBC); + E1000_READ_REG(hw, E1000_RUC); + E1000_READ_REG(hw, E1000_RFC); + E1000_READ_REG(hw, E1000_ROC); + E1000_READ_REG(hw, E1000_RJC); + E1000_READ_REG(hw, E1000_TORL); + E1000_READ_REG(hw, E1000_TORH); + E1000_READ_REG(hw, E1000_TOTL); + E1000_READ_REG(hw, E1000_TOTH); + E1000_READ_REG(hw, E1000_TPR); + E1000_READ_REG(hw, E1000_TPT); + E1000_READ_REG(hw, E1000_MPTC); + E1000_READ_REG(hw, E1000_BPTC); +} + +/** + * e1000_check_for_copper_link_generic - Check for link (Copper) + * @hw: pointer to the HW structure + * + * Checks to see of the link status of the hardware has changed. If a + * change in link status has been detected, then we read the PHY registers + * to get the current speed/duplex if link exists. + **/ +s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val; + bool link; + + DEBUGFUNC("e1000_check_for_copper_link"); + + /* We only want to go out to the PHY registers to see if Auto-Neg + * has completed and/or if our link status has changed. The + * get_link_status flag is set upon receiving a Link Status + * Change or Rx Sequence Error interrupt. + */ + if (!mac->get_link_status) + return E1000_SUCCESS; + + /* First we want to see if the MII Status Register reports + * link. If so, then we want to get the current speed/duplex + * of the PHY. + */ + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) + return E1000_SUCCESS; /* No link detected */ + + mac->get_link_status = false; + + /* Check if there was DownShift, must be checked + * immediately after link-up + */ + e1000_check_downshift_generic(hw); + + /* If we are forcing speed/duplex, then we simply return since + * we have already determined whether we have link or not. + */ + if (!mac->autoneg) + return -E1000_ERR_CONFIG; + + /* Auto-Neg is enabled. Auto Speed Detection takes care + * of MAC speed/duplex configuration. So we only need to + * configure Collision Distance in the MAC. + */ + mac->ops.config_collision_dist(hw); + + /* Configure Flow Control now that Auto-Neg has completed. + * First, we need to restore the desired flow control + * settings because we may have had to re-autoneg with a + * different link partner. + */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) + DEBUGOUT("Error configuring flow control\n"); + + return ret_val; +} + +/** + * e1000_check_for_fiber_link_generic - Check for link (Fiber) + * @hw: pointer to the HW structure + * + * Checks for link up on the hardware. If link is not up and we have + * a signal, then we need to force link up. + **/ +s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 rxcw; + u32 ctrl; + u32 status; + s32 ret_val; + + DEBUGFUNC("e1000_check_for_fiber_link_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + status = E1000_READ_REG(hw, E1000_STATUS); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + + /* If we don't have link (auto-negotiation failed or link partner + * cannot auto-negotiate), the cable is plugged in (we have signal), + * and our link partner is not trying to auto-negotiate with us (we + * are receiving idles or data), we need to force link up. We also + * need to give auto-negotiation time to complete, in case the cable + * was just plugged in. The autoneg_failed flag does this. + */ + /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ + if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) && + !(rxcw & E1000_RXCW_C)) { + if (!mac->autoneg_failed) { + mac->autoneg_failed = true; + return E1000_SUCCESS; + } + DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); + + /* Disable auto-negotiation in the TXCW register */ + E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); + + /* Force link-up and also force full-duplex. */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Configure Flow Control after forcing link up. */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) { + DEBUGOUT("Error configuring flow control\n"); + return ret_val; + } + } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { + /* If we are forcing link and we are receiving /C/ ordered + * sets, re-enable auto-negotiation in the TXCW register + * and disable forced link in the Device Control register + * in an attempt to auto-negotiate with our link partner. + */ + DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); + E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); + E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); + + mac->serdes_has_link = true; + } + + return E1000_SUCCESS; +} + +/** + * e1000_check_for_serdes_link_generic - Check for link (Serdes) + * @hw: pointer to the HW structure + * + * Checks for link up on the hardware. If link is not up and we have + * a signal, then we need to force link up. + **/ +s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 rxcw; + u32 ctrl; + u32 status; + s32 ret_val; + + DEBUGFUNC("e1000_check_for_serdes_link_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + status = E1000_READ_REG(hw, E1000_STATUS); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + + /* If we don't have link (auto-negotiation failed or link partner + * cannot auto-negotiate), and our link partner is not trying to + * auto-negotiate with us (we are receiving idles or data), + * we need to force link up. We also need to give auto-negotiation + * time to complete. + */ + /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ + if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) { + if (!mac->autoneg_failed) { + mac->autoneg_failed = true; + return E1000_SUCCESS; + } + DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); + + /* Disable auto-negotiation in the TXCW register */ + E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); + + /* Force link-up and also force full-duplex. */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Configure Flow Control after forcing link up. */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) { + DEBUGOUT("Error configuring flow control\n"); + return ret_val; + } + } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { + /* If we are forcing link and we are receiving /C/ ordered + * sets, re-enable auto-negotiation in the TXCW register + * and disable forced link in the Device Control register + * in an attempt to auto-negotiate with our link partner. + */ + DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); + E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); + E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); + + mac->serdes_has_link = true; + } else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) { + /* If we force link for non-auto-negotiation switch, check + * link status based on MAC synchronization for internal + * serdes media type. + */ + /* SYNCH bit and IV bit are sticky. */ + usec_delay(10); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + if (rxcw & E1000_RXCW_SYNCH) { + if (!(rxcw & E1000_RXCW_IV)) { + mac->serdes_has_link = true; + DEBUGOUT("SERDES: Link up - forced.\n"); + } + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - force failed.\n"); + } + } + + if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) { + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_LU) { + /* SYNCH bit and IV bit are sticky, so reread rxcw. */ + usec_delay(10); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + if (rxcw & E1000_RXCW_SYNCH) { + if (!(rxcw & E1000_RXCW_IV)) { + mac->serdes_has_link = true; + DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n"); + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n"); + } + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - no sync.\n"); + } + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - autoneg failed\n"); + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_set_default_fc_generic - Set flow control default values + * @hw: pointer to the HW structure + * + * Read the EEPROM for the default values for flow control and store the + * values. + **/ +static s32 e1000_set_default_fc_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 nvm_data; + + DEBUGFUNC("e1000_set_default_fc_generic"); + + /* Read and store word 0x0F of the EEPROM. This word contains bits + * that determine the hardware's default PAUSE (flow control) mode, + * a bit that determines whether the HW defaults to enabling or + * disabling auto-negotiation, and the direction of the + * SW defined pins. If there is no SW over-ride of the flow + * control setting, then the variable hw->fc will + * be initialized based on a value in the EEPROM. + */ + ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data); + + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (!(nvm_data & NVM_WORD0F_PAUSE_MASK)) + hw->fc.requested_mode = e1000_fc_none; + else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == + NVM_WORD0F_ASM_DIR) + hw->fc.requested_mode = e1000_fc_tx_pause; + else + hw->fc.requested_mode = e1000_fc_full; + + return E1000_SUCCESS; +} + +/** + * e1000_setup_link_generic - Setup flow control and link settings + * @hw: pointer to the HW structure + * + * Determines which flow control settings to use, then configures flow + * control. Calls the appropriate media-specific link configuration + * function. Assuming the adapter has a valid link partner, a valid link + * should be established. Assumes the hardware has previously been reset + * and the transmitter and receiver are not enabled. + **/ +s32 e1000_setup_link_generic(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_setup_link_generic"); + + /* In the case of the phy reset being blocked, we already have a link. + * We do not need to set it up again. + */ + if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) + return E1000_SUCCESS; + + /* If requested flow control is set to default, set flow control + * based on the EEPROM flow control settings. + */ + if (hw->fc.requested_mode == e1000_fc_default) { + ret_val = e1000_set_default_fc_generic(hw); + if (ret_val) + return ret_val; + } + + /* Save off the requested flow control mode for use later. Depending + * on the link partner's capabilities, we may or may not use this mode. + */ + hw->fc.current_mode = hw->fc.requested_mode; + + DEBUGOUT1("After fix-ups FlowControl is now = %x\n", + hw->fc.current_mode); + + /* Call the necessary media_type subroutine to configure the link. */ + ret_val = hw->mac.ops.setup_physical_interface(hw); + if (ret_val) + return ret_val; + + /* Initialize the flow control address, type, and PAUSE timer + * registers to their default values. This is done even if flow + * control is disabled, because it does not hurt anything to + * initialize these registers. + */ + DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); + E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); + E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); + E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); + + E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); + + return e1000_set_fc_watermarks_generic(hw); +} + +/** + * e1000_commit_fc_settings_generic - Configure flow control + * @hw: pointer to the HW structure + * + * Write the flow control settings to the Transmit Config Word Register (TXCW) + * base on the flow control settings in e1000_mac_info. + **/ +static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 txcw; + + DEBUGFUNC("e1000_commit_fc_settings_generic"); + + /* Check for a software override of the flow control settings, and + * setup the device accordingly. If auto-negotiation is enabled, then + * software will have to set the "PAUSE" bits to the correct value in + * the Transmit Config Word Register (TXCW) and re-start auto- + * negotiation. However, if auto-negotiation is disabled, then + * software will have to manually configure the two flow control enable + * bits in the CTRL register. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but we + * do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + */ + switch (hw->fc.current_mode) { + case e1000_fc_none: + /* Flow control completely disabled by a software over-ride. */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); + break; + case e1000_fc_rx_pause: + /* Rx Flow control is enabled and Tx Flow control is disabled + * by a software over-ride. Since there really isn't a way to + * advertise that we are capable of Rx Pause ONLY, we will + * advertise that we support both symmetric and asymmetric Rx + * PAUSE. Later, we will disable the adapter's ability to send + * PAUSE frames. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); + break; + case e1000_fc_tx_pause: + /* Tx Flow control is enabled, and Rx Flow control is disabled, + * by a software over-ride. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); + break; + case e1000_fc_full: + /* Flow control (both Rx and Tx) is enabled by a software + * over-ride. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + break; + } + + E1000_WRITE_REG(hw, E1000_TXCW, txcw); + mac->txcw = txcw; + + return E1000_SUCCESS; +} + +/** + * e1000_poll_fiber_serdes_link_generic - Poll for link up + * @hw: pointer to the HW structure + * + * Polls for link up by reading the status register, if link fails to come + * up with auto-negotiation, then the link is forced if a signal is detected. + **/ +static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 i, status; + s32 ret_val; + + DEBUGFUNC("e1000_poll_fiber_serdes_link_generic"); + + /* If we have a signal (the cable is plugged in, or assumed true for + * serdes media) then poll for a "Link-Up" indication in the Device + * Status Register. Time-out if a link isn't seen in 500 milliseconds + * seconds (Auto-negotiation should complete in less than 500 + * milliseconds even if the other end is doing it in SW). + */ + for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { + msec_delay(10); + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_LU) + break; + } + if (i == FIBER_LINK_UP_LIMIT) { + DEBUGOUT("Never got a valid link from auto-neg!!!\n"); + mac->autoneg_failed = true; + /* AutoNeg failed to achieve a link, so we'll call + * mac->check_for_link. This routine will force the + * link up if we detect a signal. This will allow us to + * communicate with non-autonegotiating link partners. + */ + ret_val = mac->ops.check_for_link(hw); + if (ret_val) { + DEBUGOUT("Error while checking for link\n"); + return ret_val; + } + mac->autoneg_failed = false; + } else { + mac->autoneg_failed = false; + DEBUGOUT("Valid Link Found\n"); + } + + return E1000_SUCCESS; +} + +/** + * e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes + * @hw: pointer to the HW structure + * + * Configures collision distance and flow control for fiber and serdes + * links. Upon successful setup, poll for link. + **/ +s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw) +{ + u32 ctrl; + s32 ret_val; + + DEBUGFUNC("e1000_setup_fiber_serdes_link_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + /* Take the link out of reset */ + ctrl &= ~E1000_CTRL_LRST; + + hw->mac.ops.config_collision_dist(hw); + + ret_val = e1000_commit_fc_settings_generic(hw); + if (ret_val) + return ret_val; + + /* Since auto-negotiation is enabled, take the link out of reset (the + * link will be in reset, because we previously reset the chip). This + * will restart auto-negotiation. If auto-negotiation is successful + * then the link-up status bit will be set and the flow control enable + * bits (RFCE and TFCE) will be set according to their negotiated value. + */ + DEBUGOUT("Auto-negotiation enabled\n"); + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + msec_delay(1); + + /* For these adapters, the SW definable pin 1 is set when the optics + * detect a signal. If we have a signal, then poll for a "Link-Up" + * indication. + */ + if (hw->phy.media_type == e1000_media_type_internal_serdes || + (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) { + ret_val = e1000_poll_fiber_serdes_link_generic(hw); + } else { + DEBUGOUT("No signal detected\n"); + } + + return ret_val; +} + +/** + * e1000_config_collision_dist_generic - Configure collision distance + * @hw: pointer to the HW structure + * + * Configures the collision distance to the default value and is used + * during link setup. + **/ +static void e1000_config_collision_dist_generic(struct e1000_hw *hw) +{ + u32 tctl; + + DEBUGFUNC("e1000_config_collision_dist_generic"); + + tctl = E1000_READ_REG(hw, E1000_TCTL); + + tctl &= ~E1000_TCTL_COLD; + tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; + + E1000_WRITE_REG(hw, E1000_TCTL, tctl); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_set_fc_watermarks_generic - Set flow control high/low watermarks + * @hw: pointer to the HW structure + * + * Sets the flow control high/low threshold (watermark) registers. If + * flow control XON frame transmission is enabled, then set XON frame + * transmission as well. + **/ +s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw) +{ + u32 fcrtl = 0, fcrth = 0; + + DEBUGFUNC("e1000_set_fc_watermarks_generic"); + + /* Set the flow control receive threshold registers. Normally, + * these registers will be set to a default threshold that may be + * adjusted later by the driver's runtime code. However, if the + * ability to transmit pause frames is not enabled, then these + * registers will be set to 0. + */ + if (hw->fc.current_mode & e1000_fc_tx_pause) { + /* We need to set up the Receive Threshold high and low water + * marks as well as (optionally) enabling the transmission of + * XON frames. + */ + fcrtl = hw->fc.low_water; + if (hw->fc.send_xon) + fcrtl |= E1000_FCRTL_XONE; + + fcrth = hw->fc.high_water; + } + E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl); + E1000_WRITE_REG(hw, E1000_FCRTH, fcrth); + + return E1000_SUCCESS; +} + +/** + * e1000_force_mac_fc_generic - Force the MAC's flow control settings + * @hw: pointer to the HW structure + * + * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the + * device control register to reflect the adapter settings. TFCE and RFCE + * need to be explicitly set by software when a copper PHY is used because + * autonegotiation is managed by the PHY rather than the MAC. Software must + * also configure these bits when link is forced on a fiber connection. + **/ +s32 e1000_force_mac_fc_generic(struct e1000_hw *hw) +{ + u32 ctrl; + + DEBUGFUNC("e1000_force_mac_fc_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + /* Because we didn't get link via the internal auto-negotiation + * mechanism (we either forced link or we got link via PHY + * auto-neg), we have to manually enable/disable transmit an + * receive flow control. + * + * The "Case" statement below enables/disable flow control + * according to the "hw->fc.current_mode" parameter. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause + * frames but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * frames but we do not receive pause frames). + * 3: Both Rx and Tx flow control (symmetric) is enabled. + * other: No other values should be possible at this point. + */ + DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode); + + switch (hw->fc.current_mode) { + case e1000_fc_none: + ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); + break; + case e1000_fc_rx_pause: + ctrl &= (~E1000_CTRL_TFCE); + ctrl |= E1000_CTRL_RFCE; + break; + case e1000_fc_tx_pause: + ctrl &= (~E1000_CTRL_RFCE); + ctrl |= E1000_CTRL_TFCE; + break; + case e1000_fc_full: + ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + } + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + return E1000_SUCCESS; +} + +/** + * e1000_config_fc_after_link_up_generic - Configures flow control after link + * @hw: pointer to the HW structure + * + * Checks the status of auto-negotiation after link up to ensure that the + * speed and duplex were not forced. If the link needed to be forced, then + * flow control needs to be forced also. If auto-negotiation is enabled + * and did not fail, then we configure flow control based on our link + * partner. + **/ +s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val = E1000_SUCCESS; + u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg; + u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; + u16 speed, duplex; + + DEBUGFUNC("e1000_config_fc_after_link_up_generic"); + + /* Check for the case where we have fiber media and auto-neg failed + * so we had to force link. In this case, we need to force the + * configuration of the MAC to match the "fc" parameter. + */ + if (mac->autoneg_failed) { + if (hw->phy.media_type == e1000_media_type_fiber || + hw->phy.media_type == e1000_media_type_internal_serdes) + ret_val = e1000_force_mac_fc_generic(hw); + } else { + if (hw->phy.media_type == e1000_media_type_copper) + ret_val = e1000_force_mac_fc_generic(hw); + } + + if (ret_val) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + + /* Check for the case where we have copper media and auto-neg is + * enabled. In this case, we need to check and see if Auto-Neg + * has completed, and if so, how the PHY and link partner has + * flow control configured. + */ + if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { + /* Read the MII Status Register and check to see if AutoNeg + * has completed. We read this twice because this reg has + * some "sticky" (latched) bits. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); + if (ret_val) + return ret_val; + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); + if (ret_val) + return ret_val; + + if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { + DEBUGOUT("Copper PHY and Auto Neg has not completed.\n"); + return ret_val; + } + + /* The AutoNeg process has completed, so we now need to + * read both the Auto Negotiation Advertisement + * Register (Address 4) and the Auto_Negotiation Base + * Page Ability Register (Address 5) to determine how + * flow control was negotiated. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, + &mii_nway_adv_reg); + if (ret_val) + return ret_val; + ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, + &mii_nway_lp_ability_reg); + if (ret_val) + return ret_val; + + /* Two bits in the Auto Negotiation Advertisement Register + * (Address 4) and two bits in the Auto Negotiation Base + * Page Ability Register (Address 5) determine flow control + * for both the PHY and the link partner. The following + * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, + * 1999, describes these PAUSE resolution bits and how flow + * control is determined based upon these settings. + * NOTE: DC = Don't Care + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution + *-------|---------|-------|---------|-------------------- + * 0 | 0 | DC | DC | e1000_fc_none + * 0 | 1 | 0 | DC | e1000_fc_none + * 0 | 1 | 1 | 0 | e1000_fc_none + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + * 1 | 0 | 0 | DC | e1000_fc_none + * 1 | DC | 1 | DC | e1000_fc_full + * 1 | 1 | 0 | 0 | e1000_fc_none + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + * + * Are both PAUSE bits set to 1? If so, this implies + * Symmetric Flow Control is enabled at both ends. The + * ASM_DIR bits are irrelevant per the spec. + * + * For Symmetric Flow Control: + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | DC | 1 | DC | E1000_fc_full + * + */ + if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { + /* Now we need to check if the user selected Rx ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise Rx + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if (hw->fc.requested_mode == e1000_fc_full) { + hw->fc.current_mode = e1000_fc_full; + DEBUGOUT("Flow Control = FULL.\n"); + } else { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } + } + /* For receiving PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + */ + else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_tx_pause; + DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); + } + /* For transmitting PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + */ + else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } else { + /* Per the IEEE spec, at this point flow control + * should be disabled. + */ + hw->fc.current_mode = e1000_fc_none; + DEBUGOUT("Flow Control = NONE.\n"); + } + + /* Now we need to do one last check... If we auto- + * negotiated to HALF DUPLEX, flow control should not be + * enabled per IEEE 802.3 spec. + */ + ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex); + if (ret_val) { + DEBUGOUT("Error getting link speed and duplex\n"); + return ret_val; + } + + if (duplex == HALF_DUPLEX) + hw->fc.current_mode = e1000_fc_none; + + /* Now we call a subroutine to actually force the MAC + * controller to use the correct flow control settings. + */ + ret_val = e1000_force_mac_fc_generic(hw); + if (ret_val) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + } + + /* Check for the case where we have SerDes media and auto-neg is + * enabled. In this case, we need to check and see if Auto-Neg + * has completed, and if so, how the PHY and link partner has + * flow control configured. + */ + if ((hw->phy.media_type == e1000_media_type_internal_serdes) && + mac->autoneg) { + /* Read the PCS_LSTS and check to see if AutoNeg + * has completed. + */ + pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT); + + if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) { + DEBUGOUT("PCS Auto Neg has not completed.\n"); + return ret_val; + } + + /* The AutoNeg process has completed, so we now need to + * read both the Auto Negotiation Advertisement + * Register (PCS_ANADV) and the Auto_Negotiation Base + * Page Ability Register (PCS_LPAB) to determine how + * flow control was negotiated. + */ + pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); + pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB); + + /* Two bits in the Auto Negotiation Advertisement Register + * (PCS_ANADV) and two bits in the Auto Negotiation Base + * Page Ability Register (PCS_LPAB) determine flow control + * for both the PHY and the link partner. The following + * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, + * 1999, describes these PAUSE resolution bits and how flow + * control is determined based upon these settings. + * NOTE: DC = Don't Care + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution + *-------|---------|-------|---------|-------------------- + * 0 | 0 | DC | DC | e1000_fc_none + * 0 | 1 | 0 | DC | e1000_fc_none + * 0 | 1 | 1 | 0 | e1000_fc_none + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + * 1 | 0 | 0 | DC | e1000_fc_none + * 1 | DC | 1 | DC | e1000_fc_full + * 1 | 1 | 0 | 0 | e1000_fc_none + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + * + * Are both PAUSE bits set to 1? If so, this implies + * Symmetric Flow Control is enabled at both ends. The + * ASM_DIR bits are irrelevant per the spec. + * + * For Symmetric Flow Control: + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | DC | 1 | DC | e1000_fc_full + * + */ + if ((pcs_adv_reg & E1000_TXCW_PAUSE) && + (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) { + /* Now we need to check if the user selected Rx ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise Rx + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if (hw->fc.requested_mode == e1000_fc_full) { + hw->fc.current_mode = e1000_fc_full; + DEBUGOUT("Flow Control = FULL.\n"); + } else { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } + } + /* For receiving PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + */ + else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) && + (pcs_adv_reg & E1000_TXCW_ASM_DIR) && + (pcs_lp_ability_reg & E1000_TXCW_PAUSE) && + (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_tx_pause; + DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); + } + /* For transmitting PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + */ + else if ((pcs_adv_reg & E1000_TXCW_PAUSE) && + (pcs_adv_reg & E1000_TXCW_ASM_DIR) && + !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) && + (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } else { + /* Per the IEEE spec, at this point flow control + * should be disabled. + */ + hw->fc.current_mode = e1000_fc_none; + DEBUGOUT("Flow Control = NONE.\n"); + } + + /* Now we call a subroutine to actually force the MAC + * controller to use the correct flow control settings. + */ + pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL); + pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL; + E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg); + + ret_val = e1000_force_mac_fc_generic(hw); + if (ret_val) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * Read the status register for the current speed/duplex and store the current + * speed and duplex for copper connections. + **/ +s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, + u16 *duplex) +{ + u32 status; + + DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic"); + + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_SPEED_1000) { + *speed = SPEED_1000; + DEBUGOUT("1000 Mbs, "); + } else if (status & E1000_STATUS_SPEED_100) { + *speed = SPEED_100; + DEBUGOUT("100 Mbs, "); + } else { + *speed = SPEED_10; + DEBUGOUT("10 Mbs, "); + } + + if (status & E1000_STATUS_FD) { + *duplex = FULL_DUPLEX; + DEBUGOUT("Full Duplex\n"); + } else { + *duplex = HALF_DUPLEX; + DEBUGOUT("Half Duplex\n"); + } + + return E1000_SUCCESS; +} + +/** + * e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * Sets the speed and duplex to gigabit full duplex (the only possible option) + * for fiber/serdes links. + **/ +s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw, + u16 *speed, u16 *duplex) +{ + DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic"); + + *speed = SPEED_1000; + *duplex = FULL_DUPLEX; + + return E1000_SUCCESS; +} + +/** + * e1000_get_hw_semaphore_generic - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore_generic"); + + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore_generic(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_put_hw_semaphore_generic - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + **/ +void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) +{ + u32 swsm; + + DEBUGFUNC("e1000_put_hw_semaphore_generic"); + + swsm = E1000_READ_REG(hw, E1000_SWSM); + + swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); + + E1000_WRITE_REG(hw, E1000_SWSM, swsm); +} + +/** + * e1000_get_auto_rd_done_generic - Check for auto read completion + * @hw: pointer to the HW structure + * + * Check EEPROM for Auto Read done bit. + **/ +s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw) +{ + s32 i = 0; + + DEBUGFUNC("e1000_get_auto_rd_done_generic"); + + while (i < AUTO_READ_DONE_TIMEOUT) { + if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD) + break; + msec_delay(1); + i++; + } + + if (i == AUTO_READ_DONE_TIMEOUT) { + DEBUGOUT("Auto read by HW from NVM has not completed.\n"); + return -E1000_ERR_RESET; + } + + return E1000_SUCCESS; +} + +/** + * e1000_valid_led_default_generic - Verify a valid default LED config + * @hw: pointer to the HW structure + * @data: pointer to the NVM (EEPROM) + * + * Read the EEPROM for the current default LED configuration. If the + * LED configuration is not valid, set to a valid LED configuration. + **/ +s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_valid_led_default_generic"); + + ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) + *data = ID_LED_DEFAULT; + + return E1000_SUCCESS; +} + +/** + * e1000_id_led_init_generic - + * @hw: pointer to the HW structure + * + **/ +s32 e1000_id_led_init_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val; + const u32 ledctl_mask = 0x000000FF; + const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; + const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; + u16 data, i, temp; + const u16 led_mask = 0x0F; + + DEBUGFUNC("e1000_id_led_init_generic"); + + ret_val = hw->nvm.ops.valid_led_default(hw, &data); + if (ret_val) + return ret_val; + + mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); + mac->ledctl_mode1 = mac->ledctl_default; + mac->ledctl_mode2 = mac->ledctl_default; + + for (i = 0; i < 4; i++) { + temp = (data >> (i << 2)) & led_mask; + switch (temp) { + case ID_LED_ON1_DEF2: + case ID_LED_ON1_ON2: + case ID_LED_ON1_OFF2: + mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode1 |= ledctl_on << (i << 3); + break; + case ID_LED_OFF1_DEF2: + case ID_LED_OFF1_ON2: + case ID_LED_OFF1_OFF2: + mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode1 |= ledctl_off << (i << 3); + break; + default: + /* Do nothing */ + break; + } + switch (temp) { + case ID_LED_DEF1_ON2: + case ID_LED_ON1_ON2: + case ID_LED_OFF1_ON2: + mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode2 |= ledctl_on << (i << 3); + break; + case ID_LED_DEF1_OFF2: + case ID_LED_ON1_OFF2: + case ID_LED_OFF1_OFF2: + mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode2 |= ledctl_off << (i << 3); + break; + default: + /* Do nothing */ + break; + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_setup_led_generic - Configures SW controllable LED + * @hw: pointer to the HW structure + * + * This prepares the SW controllable LED for use and saves the current state + * of the LED so it can be later restored. + **/ +s32 e1000_setup_led_generic(struct e1000_hw *hw) +{ + u32 ledctl; + + DEBUGFUNC("e1000_setup_led_generic"); + + if (hw->mac.ops.setup_led != e1000_setup_led_generic) + return -E1000_ERR_CONFIG; + + if (hw->phy.media_type == e1000_media_type_fiber) { + ledctl = E1000_READ_REG(hw, E1000_LEDCTL); + hw->mac.ledctl_default = ledctl; + /* Turn off LED0 */ + ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK | + E1000_LEDCTL_LED0_MODE_MASK); + ledctl |= (E1000_LEDCTL_MODE_LED_OFF << + E1000_LEDCTL_LED0_MODE_SHIFT); + E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); + } else if (hw->phy.media_type == e1000_media_type_copper) { + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); + } + + return E1000_SUCCESS; +} + +/** + * e1000_cleanup_led_generic - Set LED config to default operation + * @hw: pointer to the HW structure + * + * Remove the current LED configuration and set the LED configuration + * to the default value, saved from the EEPROM. + **/ +s32 e1000_cleanup_led_generic(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_cleanup_led_generic"); + + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); + return E1000_SUCCESS; +} + +/** + * e1000_blink_led_generic - Blink LED + * @hw: pointer to the HW structure + * + * Blink the LEDs which are set to be on. + **/ +s32 e1000_blink_led_generic(struct e1000_hw *hw) +{ + u32 ledctl_blink = 0; + u32 i; + + DEBUGFUNC("e1000_blink_led_generic"); + + if (hw->phy.media_type == e1000_media_type_fiber) { + /* always blink LED0 for PCI-E fiber */ + ledctl_blink = E1000_LEDCTL_LED0_BLINK | + (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); + } else { + /* Set the blink bit for each LED that's "on" (0x0E) + * (or "off" if inverted) in ledctl_mode2. The blink + * logic in hardware only works when mode is set to "on" + * so it must be changed accordingly when the mode is + * "off" and inverted. + */ + ledctl_blink = hw->mac.ledctl_mode2; + for (i = 0; i < 32; i += 8) { + u32 mode = (hw->mac.ledctl_mode2 >> i) & + E1000_LEDCTL_LED0_MODE_MASK; + u32 led_default = hw->mac.ledctl_default >> i; + + if ((!(led_default & E1000_LEDCTL_LED0_IVRT) && + (mode == E1000_LEDCTL_MODE_LED_ON)) || + ((led_default & E1000_LEDCTL_LED0_IVRT) && + (mode == E1000_LEDCTL_MODE_LED_OFF))) { + ledctl_blink &= + ~(E1000_LEDCTL_LED0_MODE_MASK << i); + ledctl_blink |= (E1000_LEDCTL_LED0_BLINK | + E1000_LEDCTL_MODE_LED_ON) << i; + } + } + } + + E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink); + + return E1000_SUCCESS; +} + +/** + * e1000_led_on_generic - Turn LED on + * @hw: pointer to the HW structure + * + * Turn LED on. + **/ +s32 e1000_led_on_generic(struct e1000_hw *hw) +{ + u32 ctrl; + + DEBUGFUNC("e1000_led_on_generic"); + + switch (hw->phy.media_type) { + case e1000_media_type_fiber: + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl &= ~E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + break; + case e1000_media_type_copper: + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); + break; + default: + break; + } + + return E1000_SUCCESS; +} + +/** + * e1000_led_off_generic - Turn LED off + * @hw: pointer to the HW structure + * + * Turn LED off. + **/ +s32 e1000_led_off_generic(struct e1000_hw *hw) +{ + u32 ctrl; + + DEBUGFUNC("e1000_led_off_generic"); + + switch (hw->phy.media_type) { + case e1000_media_type_fiber: + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + break; + case e1000_media_type_copper: + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); + break; + default: + break; + } + + return E1000_SUCCESS; +} + +/** + * e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities + * @hw: pointer to the HW structure + * @no_snoop: bitmap of snoop events + * + * Set the PCI-express register to snoop for events enabled in 'no_snoop'. + **/ +void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop) +{ + u32 gcr; + + DEBUGFUNC("e1000_set_pcie_no_snoop_generic"); + + if (no_snoop) { + gcr = E1000_READ_REG(hw, E1000_GCR); + gcr &= ~(PCIE_NO_SNOOP_ALL); + gcr |= no_snoop; + E1000_WRITE_REG(hw, E1000_GCR, gcr); + } +} + +/** + * e1000_disable_pcie_master_generic - Disables PCI-express master access + * @hw: pointer to the HW structure + * + * Returns E1000_SUCCESS if successful, else returns -10 + * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused + * the master requests to be disabled. + * + * Disables PCI-Express master access and verifies there are no pending + * requests. + **/ +s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw) +{ + u32 ctrl; + s32 timeout = MASTER_DISABLE_TIMEOUT; + + DEBUGFUNC("e1000_disable_pcie_master_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + while (timeout) { + if (!(E1000_READ_REG(hw, E1000_STATUS) & + E1000_STATUS_GIO_MASTER_ENABLE)) + break; + usec_delay(100); + timeout--; + } + + if (!timeout) { + DEBUGOUT("Master requests are pending.\n"); + return -E1000_ERR_MASTER_REQUESTS_PENDING; + } + + return E1000_SUCCESS; +} + +/** + * e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing + * @hw: pointer to the HW structure + * + * Reset the Adaptive Interframe Spacing throttle to default values. + **/ +void e1000_reset_adaptive_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + + DEBUGFUNC("e1000_reset_adaptive_generic"); + + if (!mac->adaptive_ifs) { + DEBUGOUT("Not in Adaptive IFS mode!\n"); + return; + } + + mac->current_ifs_val = 0; + mac->ifs_min_val = IFS_MIN; + mac->ifs_max_val = IFS_MAX; + mac->ifs_step_size = IFS_STEP; + mac->ifs_ratio = IFS_RATIO; + + mac->in_ifs_mode = false; + E1000_WRITE_REG(hw, E1000_AIT, 0); +} + +/** + * e1000_update_adaptive_generic - Update Adaptive Interframe Spacing + * @hw: pointer to the HW structure + * + * Update the Adaptive Interframe Spacing Throttle value based on the + * time between transmitted packets and time between collisions. + **/ +void e1000_update_adaptive_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + + DEBUGFUNC("e1000_update_adaptive_generic"); + + if (!mac->adaptive_ifs) { + DEBUGOUT("Not in Adaptive IFS mode!\n"); + return; + } + + if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) { + if (mac->tx_packet_delta > MIN_NUM_XMITS) { + mac->in_ifs_mode = true; + if (mac->current_ifs_val < mac->ifs_max_val) { + if (!mac->current_ifs_val) + mac->current_ifs_val = mac->ifs_min_val; + else + mac->current_ifs_val += + mac->ifs_step_size; + E1000_WRITE_REG(hw, E1000_AIT, + mac->current_ifs_val); + } + } + } else { + if (mac->in_ifs_mode && + (mac->tx_packet_delta <= MIN_NUM_XMITS)) { + mac->current_ifs_val = 0; + mac->in_ifs_mode = false; + E1000_WRITE_REG(hw, E1000_AIT, 0); + } + } +} + +/** + * e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings + * @hw: pointer to the HW structure + * + * Verify that when not using auto-negotiation that MDI/MDIx is correctly + * set, which is forced to MDI mode only. + **/ +static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_validate_mdi_setting_generic"); + + if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { + DEBUGOUT("Invalid MDI setting detected\n"); + hw->phy.mdix = 1; + return -E1000_ERR_CONFIG; + } + + return E1000_SUCCESS; +} + +/** + * e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings + * @hw: pointer to the HW structure + * + * Validate the MDI/MDIx setting, allowing for auto-crossover during forced + * operation. + **/ +s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic"); + + return E1000_SUCCESS; +} + +/** + * e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register + * @hw: pointer to the HW structure + * @reg: 32bit register offset such as E1000_SCTL + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes an address/data control type register. There are several of these + * and they all have the format address << 8 | data and bit 31 is polled for + * completion. + **/ +s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, + u32 offset, u8 data) +{ + u32 i, regvalue = 0; + + DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic"); + + /* Set up the address and data */ + regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT); + E1000_WRITE_REG(hw, reg, regvalue); + + /* Poll the ready bit to see if the MDI read completed */ + for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) { + usec_delay(5); + regvalue = E1000_READ_REG(hw, reg); + if (regvalue & E1000_GEN_CTL_READY) + break; + } + if (!(regvalue & E1000_GEN_CTL_READY)) { + DEBUGOUT1("Reg %08x did not indicate ready\n", reg); + return -E1000_ERR_PHY; + } + + return E1000_SUCCESS; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h new file mode 100644 index 00000000..6a1b0f52 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h @@ -0,0 +1,80 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_MAC_H_ +#define _E1000_MAC_H_ + +void e1000_init_mac_ops_generic(struct e1000_hw *hw); +void e1000_null_mac_generic(struct e1000_hw *hw); +s32 e1000_null_ops_generic(struct e1000_hw *hw); +s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d); +bool e1000_null_mng_mode(struct e1000_hw *hw); +void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a); +void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b); +void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a); +s32 e1000_blink_led_generic(struct e1000_hw *hw); +s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw); +s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw); +s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw); +s32 e1000_cleanup_led_generic(struct e1000_hw *hw); +s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw); +s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw); +s32 e1000_force_mac_fc_generic(struct e1000_hw *hw); +s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw); +s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); +void e1000_set_lan_id_single_port(struct e1000_hw *hw); +s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); +s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, + u16 *duplex); +s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, + u16 *speed, u16 *duplex); +s32 e1000_id_led_init_generic(struct e1000_hw *hw); +s32 e1000_led_on_generic(struct e1000_hw *hw); +s32 e1000_led_off_generic(struct e1000_hw *hw); +void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, + u8 *mc_addr_list, u32 mc_addr_count); +s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw); +s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw); +s32 e1000_setup_led_generic(struct e1000_hw *hw); +s32 e1000_setup_link_generic(struct e1000_hw *hw); +s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw); +s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, + u32 offset, u8 data); + +u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr); + +void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw); +void e1000_clear_vfta_generic(struct e1000_hw *hw); +void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); +void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); +s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); +void e1000_reset_adaptive_generic(struct e1000_hw *hw); +void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); +void e1000_update_adaptive_generic(struct e1000_hw *hw); +void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c new file mode 100644 index 00000000..a1700398 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c @@ -0,0 +1,554 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +/** + * e1000_calculate_checksum - Calculate checksum for buffer + * @buffer: pointer to EEPROM + * @length: size of EEPROM to calculate a checksum for + * + * Calculates the checksum for some buffer on a specified length. The + * checksum calculated is returned. + **/ +u8 e1000_calculate_checksum(u8 *buffer, u32 length) +{ + u32 i; + u8 sum = 0; + + DEBUGFUNC("e1000_calculate_checksum"); + + if (!buffer) + return 0; + + for (i = 0; i < length; i++) + sum += buffer[i]; + + return (u8) (0 - sum); +} + +/** + * e1000_mng_enable_host_if_generic - Checks host interface is enabled + * @hw: pointer to the HW structure + * + * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND + * + * This function checks whether the HOST IF is enabled for command operation + * and also checks whether the previous command is completed. It busy waits + * in case of previous command is not completed. + **/ +s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw) +{ + u32 hicr; + u8 i; + + DEBUGFUNC("e1000_mng_enable_host_if_generic"); + + if (!hw->mac.arc_subsystem_valid) { + DEBUGOUT("ARC subsystem not valid.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Check that the host interface is enabled. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_EN)) { + DEBUGOUT("E1000_HOST_EN bit disabled.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + /* check the previous command is completed */ + for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) { + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_C)) + break; + msec_delay_irq(1); + } + + if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { + DEBUGOUT("Previous command timeout failed .\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + return E1000_SUCCESS; +} + +/** + * e1000_check_mng_mode_generic - Generic check management mode + * @hw: pointer to the HW structure + * + * Reads the firmware semaphore register and returns true (>0) if + * manageability is enabled, else false (0). + **/ +bool e1000_check_mng_mode_generic(struct e1000_hw *hw) +{ + u32 fwsm = E1000_READ_REG(hw, E1000_FWSM); + + DEBUGFUNC("e1000_check_mng_mode_generic"); + + + return (fwsm & E1000_FWSM_MODE_MASK) == + (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT); +} + +/** + * e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on Tx + * @hw: pointer to the HW structure + * + * Enables packet filtering on transmit packets if manageability is enabled + * and host interface is enabled. + **/ +bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw) +{ + struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie; + u32 *buffer = (u32 *)&hw->mng_cookie; + u32 offset; + s32 ret_val, hdr_csum, csum; + u8 i, len; + + DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic"); + + hw->mac.tx_pkt_filtering = true; + + /* No manageability, no filtering */ + if (!hw->mac.ops.check_mng_mode(hw)) { + hw->mac.tx_pkt_filtering = false; + return hw->mac.tx_pkt_filtering; + } + + /* If we can't read from the host interface for whatever + * reason, disable filtering. + */ + ret_val = e1000_mng_enable_host_if_generic(hw); + if (ret_val != E1000_SUCCESS) { + hw->mac.tx_pkt_filtering = false; + return hw->mac.tx_pkt_filtering; + } + + /* Read in the header. Length and offset are in dwords. */ + len = E1000_MNG_DHCP_COOKIE_LENGTH >> 2; + offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2; + for (i = 0; i < len; i++) + *(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, + offset + i); + hdr_csum = hdr->checksum; + hdr->checksum = 0; + csum = e1000_calculate_checksum((u8 *)hdr, + E1000_MNG_DHCP_COOKIE_LENGTH); + /* If either the checksums or signature don't match, then + * the cookie area isn't considered valid, in which case we + * take the safe route of assuming Tx filtering is enabled. + */ + if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) { + hw->mac.tx_pkt_filtering = true; + return hw->mac.tx_pkt_filtering; + } + + /* Cookie area is valid, make the final check for filtering. */ + if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING)) + hw->mac.tx_pkt_filtering = false; + + return hw->mac.tx_pkt_filtering; +} + +/** + * e1000_mng_write_cmd_header_generic - Writes manageability command header + * @hw: pointer to the HW structure + * @hdr: pointer to the host interface command header + * + * Writes the command header after does the checksum calculation. + **/ +s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr) +{ + u16 i, length = sizeof(struct e1000_host_mng_command_header); + + DEBUGFUNC("e1000_mng_write_cmd_header_generic"); + + /* Write the whole command header structure with new checksum. */ + + hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length); + + length >>= 2; + /* Write the relevant command block into the ram area. */ + for (i = 0; i < length; i++) { + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i, + *((u32 *) hdr + i)); + E1000_WRITE_FLUSH(hw); + } + + return E1000_SUCCESS; +} + +/** + * e1000_mng_host_if_write_generic - Write to the manageability host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface buffer + * @length: size of the buffer + * @offset: location in the buffer to write to + * @sum: sum of the data (not checksum) + * + * This function writes the buffer content at the offset given on the host if. + * It also does alignment considerations to do the writes in most efficient + * way. Also fills up the sum of the buffer in *buffer parameter. + **/ +s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer, + u16 length, u16 offset, u8 *sum) +{ + u8 *tmp; + u8 *bufptr = buffer; + u32 data = 0; + u16 remaining, i, j, prev_bytes; + + DEBUGFUNC("e1000_mng_host_if_write_generic"); + + /* sum = only sum of the data and it is not checksum */ + + if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) + return -E1000_ERR_PARAM; + + tmp = (u8 *)&data; + prev_bytes = offset & 0x3; + offset >>= 2; + + if (prev_bytes) { + data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset); + for (j = prev_bytes; j < sizeof(u32); j++) { + *(tmp + j) = *bufptr++; + *sum += *(tmp + j); + } + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data); + length -= j - prev_bytes; + offset++; + } + + remaining = length & 0x3; + length -= remaining; + + /* Calculate length in DWORDs */ + length >>= 2; + + /* The device driver writes the relevant command block into the + * ram area. + */ + for (i = 0; i < length; i++) { + for (j = 0; j < sizeof(u32); j++) { + *(tmp + j) = *bufptr++; + *sum += *(tmp + j); + } + + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, + data); + } + if (remaining) { + for (j = 0; j < sizeof(u32); j++) { + if (j < remaining) + *(tmp + j) = *bufptr++; + else + *(tmp + j) = 0; + + *sum += *(tmp + j); + } + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, + data); + } + + return E1000_SUCCESS; +} + +/** + * e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface + * @length: size of the buffer + * + * Writes the DHCP information to the host interface. + **/ +s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, u8 *buffer, + u16 length) +{ + struct e1000_host_mng_command_header hdr; + s32 ret_val; + u32 hicr; + + DEBUGFUNC("e1000_mng_write_dhcp_info_generic"); + + hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD; + hdr.command_length = length; + hdr.reserved1 = 0; + hdr.reserved2 = 0; + hdr.checksum = 0; + + /* Enable the host interface */ + ret_val = e1000_mng_enable_host_if_generic(hw); + if (ret_val) + return ret_val; + + /* Populate the host interface with the contents of "buffer". */ + ret_val = e1000_mng_host_if_write_generic(hw, buffer, length, + sizeof(hdr), &(hdr.checksum)); + if (ret_val) + return ret_val; + + /* Write the manageability command header */ + ret_val = e1000_mng_write_cmd_header_generic(hw, &hdr); + if (ret_val) + return ret_val; + + /* Tell the ARC a new command is pending. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); + + return E1000_SUCCESS; +} + +/** + * e1000_enable_mng_pass_thru - Check if management passthrough is needed + * @hw: pointer to the HW structure + * + * Verifies the hardware needs to leave interface enabled so that frames can + * be directed to and from the management interface. + **/ +bool e1000_enable_mng_pass_thru(struct e1000_hw *hw) +{ + u32 manc; + u32 fwsm, factps; + + DEBUGFUNC("e1000_enable_mng_pass_thru"); + + if (!hw->mac.asf_firmware_present) + return false; + + manc = E1000_READ_REG(hw, E1000_MANC); + + if (!(manc & E1000_MANC_RCV_TCO_EN)) + return false; + + if (hw->mac.has_fwsm) { + fwsm = E1000_READ_REG(hw, E1000_FWSM); + factps = E1000_READ_REG(hw, E1000_FACTPS); + + if (!(factps & E1000_FACTPS_MNGCG) && + ((fwsm & E1000_FWSM_MODE_MASK) == + (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) + return true; + } else if ((manc & E1000_MANC_SMBUS_EN) && + !(manc & E1000_MANC_ASF_EN)) { + return true; + } + + return false; +} + +/** + * e1000_host_interface_command - Writes buffer to host interface + * @hw: pointer to the HW structure + * @buffer: contains a command to write + * @length: the byte length of the buffer, must be multiple of 4 bytes + * + * Writes a buffer to the Host Interface. Upon success, returns E1000_SUCCESS + * else returns E1000_ERR_HOST_INTERFACE_COMMAND. + **/ +s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length) +{ + u32 hicr, i; + + DEBUGFUNC("e1000_host_interface_command"); + + if (!(hw->mac.arc_subsystem_valid)) { + DEBUGOUT("Hardware doesn't support host interface command.\n"); + return E1000_SUCCESS; + } + + if (!hw->mac.asf_firmware_present) { + DEBUGOUT("Firmware is not present.\n"); + return E1000_SUCCESS; + } + + if (length == 0 || length & 0x3 || + length > E1000_HI_MAX_BLOCK_BYTE_LENGTH) { + DEBUGOUT("Buffer length failure.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Check that the host interface is enabled. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_EN)) { + DEBUGOUT("E1000_HOST_EN bit disabled.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Calculate length in DWORDs */ + length >>= 2; + + /* The device driver writes the relevant command block + * into the ram area. + */ + for (i = 0; i < length; i++) + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i, + *((u32 *)buffer + i)); + + /* Setting this bit tells the ARC that a new command is pending. */ + E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); + + for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_C)) + break; + msec_delay(1); + } + + /* Check command successful completion. */ + if (i == E1000_HI_COMMAND_TIMEOUT || + (!(E1000_READ_REG(hw, E1000_HICR) & E1000_HICR_SV))) { + DEBUGOUT("Command has failed with no status valid.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + for (i = 0; i < length; i++) + *((u32 *)buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, + E1000_HOST_IF, + i); + + return E1000_SUCCESS; +} +/** + * e1000_load_firmware - Writes proxy FW code buffer to host interface + * and execute. + * @hw: pointer to the HW structure + * @buffer: contains a firmware to write + * @length: the byte length of the buffer, must be multiple of 4 bytes + * + * Upon success returns E1000_SUCCESS, returns E1000_ERR_CONFIG if not enabled + * in HW else returns E1000_ERR_HOST_INTERFACE_COMMAND. + **/ +s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length) +{ + u32 hicr, hibba, fwsm, icr, i; + + DEBUGFUNC("e1000_load_firmware"); + + if (hw->mac.type < e1000_i210) { + DEBUGOUT("Hardware doesn't support loading FW by the driver\n"); + return -E1000_ERR_CONFIG; + } + + /* Check that the host interface is enabled. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_EN)) { + DEBUGOUT("E1000_HOST_EN bit disabled.\n"); + return -E1000_ERR_CONFIG; + } + if (!(hicr & E1000_HICR_MEMORY_BASE_EN)) { + DEBUGOUT("E1000_HICR_MEMORY_BASE_EN bit disabled.\n"); + return -E1000_ERR_CONFIG; + } + + if (length == 0 || length & 0x3 || length > E1000_HI_FW_MAX_LENGTH) { + DEBUGOUT("Buffer length failure.\n"); + return -E1000_ERR_INVALID_ARGUMENT; + } + + /* Clear notification from ROM-FW by reading ICR register */ + icr = E1000_READ_REG(hw, E1000_ICR_V2); + + /* Reset ROM-FW */ + hicr = E1000_READ_REG(hw, E1000_HICR); + hicr |= E1000_HICR_FW_RESET_ENABLE; + E1000_WRITE_REG(hw, E1000_HICR, hicr); + hicr |= E1000_HICR_FW_RESET; + E1000_WRITE_REG(hw, E1000_HICR, hicr); + E1000_WRITE_FLUSH(hw); + + /* Wait till MAC notifies about its readiness after ROM-FW reset */ + for (i = 0; i < (E1000_HI_COMMAND_TIMEOUT * 2); i++) { + icr = E1000_READ_REG(hw, E1000_ICR_V2); + if (icr & E1000_ICR_MNG) + break; + msec_delay(1); + } + + /* Check for timeout */ + if (i == E1000_HI_COMMAND_TIMEOUT) { + DEBUGOUT("FW reset failed.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Wait till MAC is ready to accept new FW code */ + for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { + fwsm = E1000_READ_REG(hw, E1000_FWSM); + if ((fwsm & E1000_FWSM_FW_VALID) && + ((fwsm & E1000_FWSM_MODE_MASK) >> E1000_FWSM_MODE_SHIFT == + E1000_FWSM_HI_EN_ONLY_MODE)) + break; + msec_delay(1); + } + + /* Check for timeout */ + if (i == E1000_HI_COMMAND_TIMEOUT) { + DEBUGOUT("FW reset failed.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Calculate length in DWORDs */ + length >>= 2; + + /* The device driver writes the relevant FW code block + * into the ram area in DWORDs via 1kB ram addressing window. + */ + for (i = 0; i < length; i++) { + if (!(i % E1000_HI_FW_BLOCK_DWORD_LENGTH)) { + /* Point to correct 1kB ram window */ + hibba = E1000_HI_FW_BASE_ADDRESS + + ((E1000_HI_FW_BLOCK_DWORD_LENGTH << 2) * + (i / E1000_HI_FW_BLOCK_DWORD_LENGTH)); + + E1000_WRITE_REG(hw, E1000_HIBBA, hibba); + } + + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, + i % E1000_HI_FW_BLOCK_DWORD_LENGTH, + *((u32 *)buffer + i)); + } + + /* Setting this bit tells the ARC that a new FW is ready to execute. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); + + for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_C)) + break; + msec_delay(1); + } + + /* Check for successful FW start. */ + if (i == E1000_HI_COMMAND_TIMEOUT) { + DEBUGOUT("New FW did not start within timeout period.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + return E1000_SUCCESS; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h new file mode 100644 index 00000000..c94b2185 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h @@ -0,0 +1,89 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_MANAGE_H_ +#define _E1000_MANAGE_H_ + +bool e1000_check_mng_mode_generic(struct e1000_hw *hw); +bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw); +s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw); +s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer, + u16 length, u16 offset, u8 *sum); +s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr); +s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, + u8 *buffer, u16 length); +bool e1000_enable_mng_pass_thru(struct e1000_hw *hw); +u8 e1000_calculate_checksum(u8 *buffer, u32 length); +s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length); +s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length); + +enum e1000_mng_mode { + e1000_mng_mode_none = 0, + e1000_mng_mode_asf, + e1000_mng_mode_pt, + e1000_mng_mode_ipmi, + e1000_mng_mode_host_if_only +}; + +#define E1000_FACTPS_MNGCG 0x20000000 + +#define E1000_FWSM_MODE_MASK 0xE +#define E1000_FWSM_MODE_SHIFT 1 +#define E1000_FWSM_FW_VALID 0x00008000 +#define E1000_FWSM_HI_EN_ONLY_MODE 0x4 + +#define E1000_MNG_IAMT_MODE 0x3 +#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10 +#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0 +#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10 +#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64 +#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING 0x1 +#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 + +#define E1000_VFTA_ENTRY_SHIFT 5 +#define E1000_VFTA_ENTRY_MASK 0x7F +#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F + +#define E1000_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */ +#define E1000_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */ +#define E1000_HI_COMMAND_TIMEOUT 500 /* Process HI cmd limit */ +#define E1000_HI_FW_BASE_ADDRESS 0x10000 +#define E1000_HI_FW_MAX_LENGTH (64 * 1024) /* Num of bytes */ +#define E1000_HI_FW_BLOCK_DWORD_LENGTH 256 /* Num of DWORDs per page */ +#define E1000_HICR_MEMORY_BASE_EN 0x200 /* MB Enable bit - RO */ +#define E1000_HICR_EN 0x01 /* Enable bit - RO */ +/* Driver sets this bit when done to put command in RAM */ +#define E1000_HICR_C 0x02 +#define E1000_HICR_SV 0x04 /* Status Validity */ +#define E1000_HICR_FW_RESET_ENABLE 0x40 +#define E1000_HICR_FW_RESET 0x80 + +/* Intel(R) Active Management Technology signature */ +#define E1000_IAMT_SIGNATURE 0x544D4149 + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c new file mode 100644 index 00000000..3ef0d98b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c @@ -0,0 +1,525 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_mbx.h" + +/** + * e1000_null_mbx_check_for_flag - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +static s32 e1000_null_mbx_check_for_flag(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG mbx_id) +{ + DEBUGFUNC("e1000_null_mbx_check_flag"); + + return E1000_SUCCESS; +} + +/** + * e1000_null_mbx_transact - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +static s32 e1000_null_mbx_transact(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG *msg, + u16 E1000_UNUSEDARG size, + u16 E1000_UNUSEDARG mbx_id) +{ + DEBUGFUNC("e1000_null_mbx_rw_msg"); + + return E1000_SUCCESS; +} + +/** + * e1000_read_mbx - Reads a message from the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to read + * + * returns SUCCESS if it successfully read message from buffer + **/ +s32 e1000_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_read_mbx"); + + /* limit read to size of mailbox */ + if (size > mbx->size) + size = mbx->size; + + if (mbx->ops.read) + ret_val = mbx->ops.read(hw, msg, size, mbx_id); + + return ret_val; +} + +/** + * e1000_write_mbx - Write a message to the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully copied message into the buffer + **/ +s32 e1000_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_write_mbx"); + + if (size > mbx->size) + ret_val = -E1000_ERR_MBX; + + else if (mbx->ops.write) + ret_val = mbx->ops.write(hw, msg, size, mbx_id); + + return ret_val; +} + +/** + * e1000_check_for_msg - checks to see if someone sent us mail + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to check + * + * returns SUCCESS if the Status bit was found or else ERR_MBX + **/ +s32 e1000_check_for_msg(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_msg"); + + if (mbx->ops.check_for_msg) + ret_val = mbx->ops.check_for_msg(hw, mbx_id); + + return ret_val; +} + +/** + * e1000_check_for_ack - checks to see if someone sent us ACK + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to check + * + * returns SUCCESS if the Status bit was found or else ERR_MBX + **/ +s32 e1000_check_for_ack(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_ack"); + + if (mbx->ops.check_for_ack) + ret_val = mbx->ops.check_for_ack(hw, mbx_id); + + return ret_val; +} + +/** + * e1000_check_for_rst - checks to see if other side has reset + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to check + * + * returns SUCCESS if the Status bit was found or else ERR_MBX + **/ +s32 e1000_check_for_rst(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_rst"); + + if (mbx->ops.check_for_rst) + ret_val = mbx->ops.check_for_rst(hw, mbx_id); + + return ret_val; +} + +/** + * e1000_poll_for_msg - Wait for message notification + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully received a message notification + **/ +static s32 e1000_poll_for_msg(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + int countdown = mbx->timeout; + + DEBUGFUNC("e1000_poll_for_msg"); + + if (!countdown || !mbx->ops.check_for_msg) + goto out; + + while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) { + countdown--; + if (!countdown) + break; + usec_delay(mbx->usec_delay); + } + + /* if we failed, all future posted messages fail until reset */ + if (!countdown) + mbx->timeout = 0; +out: + return countdown ? E1000_SUCCESS : -E1000_ERR_MBX; +} + +/** + * e1000_poll_for_ack - Wait for message acknowledgement + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully received a message acknowledgement + **/ +static s32 e1000_poll_for_ack(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + int countdown = mbx->timeout; + + DEBUGFUNC("e1000_poll_for_ack"); + + if (!countdown || !mbx->ops.check_for_ack) + goto out; + + while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) { + countdown--; + if (!countdown) + break; + usec_delay(mbx->usec_delay); + } + + /* if we failed, all future posted messages fail until reset */ + if (!countdown) + mbx->timeout = 0; +out: + return countdown ? E1000_SUCCESS : -E1000_ERR_MBX; +} + +/** + * e1000_read_posted_mbx - Wait for message notification and receive message + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully received a message notification and + * copied it into the receive buffer. + **/ +s32 e1000_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_read_posted_mbx"); + + if (!mbx->ops.read) + goto out; + + ret_val = e1000_poll_for_msg(hw, mbx_id); + + /* if ack received read message, otherwise we timed out */ + if (!ret_val) + ret_val = mbx->ops.read(hw, msg, size, mbx_id); +out: + return ret_val; +} + +/** + * e1000_write_posted_mbx - Write a message to the mailbox, wait for ack + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully copied message into the buffer and + * received an ack to that message within delay * timeout period + **/ +s32 e1000_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_write_posted_mbx"); + + /* exit if either we can't write or there isn't a defined timeout */ + if (!mbx->ops.write || !mbx->timeout) + goto out; + + /* send msg */ + ret_val = mbx->ops.write(hw, msg, size, mbx_id); + + /* if msg sent wait until we receive an ack */ + if (!ret_val) + ret_val = e1000_poll_for_ack(hw, mbx_id); +out: + return ret_val; +} + +/** + * e1000_init_mbx_ops_generic - Initialize mbx function pointers + * @hw: pointer to the HW structure + * + * Sets the function pointers to no-op functions + **/ +void e1000_init_mbx_ops_generic(struct e1000_hw *hw) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + mbx->ops.init_params = e1000_null_ops_generic; + mbx->ops.read = e1000_null_mbx_transact; + mbx->ops.write = e1000_null_mbx_transact; + mbx->ops.check_for_msg = e1000_null_mbx_check_for_flag; + mbx->ops.check_for_ack = e1000_null_mbx_check_for_flag; + mbx->ops.check_for_rst = e1000_null_mbx_check_for_flag; + mbx->ops.read_posted = e1000_read_posted_mbx; + mbx->ops.write_posted = e1000_write_posted_mbx; +} + +static s32 e1000_check_for_bit_pf(struct e1000_hw *hw, u32 mask) +{ + u32 mbvficr = E1000_READ_REG(hw, E1000_MBVFICR); + s32 ret_val = -E1000_ERR_MBX; + + if (mbvficr & mask) { + ret_val = E1000_SUCCESS; + E1000_WRITE_REG(hw, E1000_MBVFICR, mask); + } + + return ret_val; +} + +/** + * e1000_check_for_msg_pf - checks to see if the VF has sent mail + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * returns SUCCESS if the VF has set the Status bit or else ERR_MBX + **/ +static s32 e1000_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number) +{ + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_msg_pf"); + + if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) { + ret_val = E1000_SUCCESS; + hw->mbx.stats.reqs++; + } + + return ret_val; +} + +/** + * e1000_check_for_ack_pf - checks to see if the VF has ACKed + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * returns SUCCESS if the VF has set the Status bit or else ERR_MBX + **/ +static s32 e1000_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number) +{ + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_ack_pf"); + + if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) { + ret_val = E1000_SUCCESS; + hw->mbx.stats.acks++; + } + + return ret_val; +} + +/** + * e1000_check_for_rst_pf - checks to see if the VF has reset + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * returns SUCCESS if the VF has set the Status bit or else ERR_MBX + **/ +static s32 e1000_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number) +{ + u32 vflre = E1000_READ_REG(hw, E1000_VFLRE); + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_rst_pf"); + + if (vflre & (1 << vf_number)) { + ret_val = E1000_SUCCESS; + E1000_WRITE_REG(hw, E1000_VFLRE, (1 << vf_number)); + hw->mbx.stats.rsts++; + } + + return ret_val; +} + +/** + * e1000_obtain_mbx_lock_pf - obtain mailbox lock + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * return SUCCESS if we obtained the mailbox lock + **/ +static s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) +{ + s32 ret_val = -E1000_ERR_MBX; + u32 p2v_mailbox; + + DEBUGFUNC("e1000_obtain_mbx_lock_pf"); + + /* Take ownership of the buffer */ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); + + /* reserve mailbox for vf use */ + p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number)); + if (p2v_mailbox & E1000_P2VMAILBOX_PFU) + ret_val = E1000_SUCCESS; + + return ret_val; +} + +/** + * e1000_write_mbx_pf - Places a message in the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @vf_number: the VF index + * + * returns SUCCESS if it successfully copied message into the buffer + **/ +static s32 e1000_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, + u16 vf_number) +{ + s32 ret_val; + u16 i; + + DEBUGFUNC("e1000_write_mbx_pf"); + + /* lock the mailbox to prevent pf/vf race condition */ + ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number); + if (ret_val) + goto out_no_write; + + /* flush msg and acks as we are overwriting the message buffer */ + e1000_check_for_msg_pf(hw, vf_number); + e1000_check_for_ack_pf(hw, vf_number); + + /* copy the caller specified message to the mailbox memory buffer */ + for (i = 0; i < size; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i, msg[i]); + + /* Interrupt VF to tell it a message has been sent and release buffer*/ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS); + + /* update stats */ + hw->mbx.stats.msgs_tx++; + +out_no_write: + return ret_val; + +} + +/** + * e1000_read_mbx_pf - Read a message from the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @vf_number: the VF index + * + * This function copies a message from the mailbox buffer to the caller's + * memory buffer. The presumption is that the caller knows that there was + * a message due to a VF request so no polling for message is needed. + **/ +static s32 e1000_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, + u16 vf_number) +{ + s32 ret_val; + u16 i; + + DEBUGFUNC("e1000_read_mbx_pf"); + + /* lock the mailbox to prevent pf/vf race condition */ + ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number); + if (ret_val) + goto out_no_read; + + /* copy the message to the mailbox memory buffer */ + for (i = 0; i < size; i++) + msg[i] = E1000_READ_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i); + + /* Acknowledge the message and release buffer */ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK); + + /* update stats */ + hw->mbx.stats.msgs_rx++; + +out_no_read: + return ret_val; +} + +/** + * e1000_init_mbx_params_pf - set initial values for pf mailbox + * @hw: pointer to the HW structure + * + * Initializes the hw->mbx struct to correct values for pf mailbox + */ +s32 e1000_init_mbx_params_pf(struct e1000_hw *hw) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + + switch (hw->mac.type) { + case e1000_82576: + case e1000_i350: + case e1000_i354: + mbx->timeout = 0; + mbx->usec_delay = 0; + + mbx->size = E1000_VFMAILBOX_SIZE; + + mbx->ops.read = e1000_read_mbx_pf; + mbx->ops.write = e1000_write_mbx_pf; + mbx->ops.read_posted = e1000_read_posted_mbx; + mbx->ops.write_posted = e1000_write_posted_mbx; + mbx->ops.check_for_msg = e1000_check_for_msg_pf; + mbx->ops.check_for_ack = e1000_check_for_ack_pf; + mbx->ops.check_for_rst = e1000_check_for_rst_pf; + + mbx->stats.msgs_tx = 0; + mbx->stats.msgs_rx = 0; + mbx->stats.reqs = 0; + mbx->stats.acks = 0; + mbx->stats.rsts = 0; + default: + return E1000_SUCCESS; + } +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h new file mode 100644 index 00000000..bbf838c8 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h @@ -0,0 +1,87 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_MBX_H_ +#define _E1000_MBX_H_ + +#include "e1000_api.h" + +#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */ +#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ +#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ +#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ +#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ + +#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */ +#define E1000_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ +#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */ +#define E1000_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ + +#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ + +/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the + * PF. The reverse is true if it is E1000_PF_*. + * Message ACK's are the value or'd with 0xF0000000 + */ +/* Msgs below or'd with this are the ACK */ +#define E1000_VT_MSGTYPE_ACK 0x80000000 +/* Msgs below or'd with this are the NACK */ +#define E1000_VT_MSGTYPE_NACK 0x40000000 +/* Indicates that VF is still clear to send requests */ +#define E1000_VT_MSGTYPE_CTS 0x20000000 +#define E1000_VT_MSGINFO_SHIFT 16 +/* bits 23:16 are used for extra info for certain messages */ +#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) + +#define E1000_VF_RESET 0x01 /* VF requests reset */ +#define E1000_VF_SET_MAC_ADDR 0x02 /* VF requests to set MAC addr */ +#define E1000_VF_SET_MULTICAST 0x03 /* VF requests to set MC addr */ +#define E1000_VF_SET_MULTICAST_COUNT_MASK (0x1F << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_MULTICAST_OVERFLOW (0x80 << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_VLAN 0x04 /* VF requests to set VLAN */ +#define E1000_VF_SET_VLAN_ADD (0x01 << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_LPE 0x05 /* reqs to set VMOLR.LPE */ +#define E1000_VF_SET_PROMISC 0x06 /* reqs to clear VMOLR.ROPE/MPME*/ +#define E1000_VF_SET_PROMISC_UNICAST (0x01 << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_PROMISC_MULTICAST (0x02 << E1000_VT_MSGINFO_SHIFT) + +#define E1000_PF_CONTROL_MSG 0x0100 /* PF control message */ + +#define E1000_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ +#define E1000_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ + +s32 e1000_read_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_write_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_read_posted_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_write_posted_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_check_for_msg(struct e1000_hw *, u16); +s32 e1000_check_for_ack(struct e1000_hw *, u16); +s32 e1000_check_for_rst(struct e1000_hw *, u16); +void e1000_init_mbx_ops_generic(struct e1000_hw *hw); +s32 e1000_init_mbx_params_pf(struct e1000_hw *); + +#endif /* _E1000_MBX_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c new file mode 100644 index 00000000..6188d007 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c @@ -0,0 +1,965 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +static void e1000_reload_nvm_generic(struct e1000_hw *hw); + +/** + * e1000_init_nvm_ops_generic - Initialize NVM function pointers + * @hw: pointer to the HW structure + * + * Setups up the function pointers to no-op functions + **/ +void e1000_init_nvm_ops_generic(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + DEBUGFUNC("e1000_init_nvm_ops_generic"); + + /* Initialize function pointers */ + nvm->ops.init_params = e1000_null_ops_generic; + nvm->ops.acquire = e1000_null_ops_generic; + nvm->ops.read = e1000_null_read_nvm; + nvm->ops.release = e1000_null_nvm_generic; + nvm->ops.reload = e1000_reload_nvm_generic; + nvm->ops.update = e1000_null_ops_generic; + nvm->ops.valid_led_default = e1000_null_led_default; + nvm->ops.validate = e1000_null_ops_generic; + nvm->ops.write = e1000_null_write_nvm; +} + +/** + * e1000_null_nvm_read - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_read_nvm(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, + u16 E1000_UNUSEDARG *c) +{ + DEBUGFUNC("e1000_null_read_nvm"); + return E1000_SUCCESS; +} + +/** + * e1000_null_nvm_generic - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_nvm_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_nvm_generic"); + return; +} + +/** + * e1000_null_led_default - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_led_default(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG *data) +{ + DEBUGFUNC("e1000_null_led_default"); + return E1000_SUCCESS; +} + +/** + * e1000_null_write_nvm - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_write_nvm(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, + u16 E1000_UNUSEDARG *c) +{ + DEBUGFUNC("e1000_null_write_nvm"); + return E1000_SUCCESS; +} + +/** + * e1000_raise_eec_clk - Raise EEPROM clock + * @hw: pointer to the HW structure + * @eecd: pointer to the EEPROM + * + * Enable/Raise the EEPROM clock bit. + **/ +static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) +{ + *eecd = *eecd | E1000_EECD_SK; + E1000_WRITE_REG(hw, E1000_EECD, *eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(hw->nvm.delay_usec); +} + +/** + * e1000_lower_eec_clk - Lower EEPROM clock + * @hw: pointer to the HW structure + * @eecd: pointer to the EEPROM + * + * Clear/Lower the EEPROM clock bit. + **/ +static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) +{ + *eecd = *eecd & ~E1000_EECD_SK; + E1000_WRITE_REG(hw, E1000_EECD, *eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(hw->nvm.delay_usec); +} + +/** + * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM + * @hw: pointer to the HW structure + * @data: data to send to the EEPROM + * @count: number of bits to shift out + * + * We need to shift 'count' bits out to the EEPROM. So, the value in the + * "data" parameter will be shifted out to the EEPROM one bit at a time. + * In order to do this, "data" must be broken down into bits. + **/ +static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + u32 mask; + + DEBUGFUNC("e1000_shift_out_eec_bits"); + + mask = 0x01 << (count - 1); + if (nvm->type == e1000_nvm_eeprom_spi) + eecd |= E1000_EECD_DO; + + do { + eecd &= ~E1000_EECD_DI; + + if (data & mask) + eecd |= E1000_EECD_DI; + + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + + usec_delay(nvm->delay_usec); + + e1000_raise_eec_clk(hw, &eecd); + e1000_lower_eec_clk(hw, &eecd); + + mask >>= 1; + } while (mask); + + eecd &= ~E1000_EECD_DI; + E1000_WRITE_REG(hw, E1000_EECD, eecd); +} + +/** + * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM + * @hw: pointer to the HW structure + * @count: number of bits to shift in + * + * In order to read a register from the EEPROM, we need to shift 'count' bits + * in from the EEPROM. Bits are "shifted in" by raising the clock input to + * the EEPROM (setting the SK bit), and then reading the value of the data out + * "DO" bit. During this "shifting in" process the data in "DI" bit should + * always be clear. + **/ +static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count) +{ + u32 eecd; + u32 i; + u16 data; + + DEBUGFUNC("e1000_shift_in_eec_bits"); + + eecd = E1000_READ_REG(hw, E1000_EECD); + + eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); + data = 0; + + for (i = 0; i < count; i++) { + data <<= 1; + e1000_raise_eec_clk(hw, &eecd); + + eecd = E1000_READ_REG(hw, E1000_EECD); + + eecd &= ~E1000_EECD_DI; + if (eecd & E1000_EECD_DO) + data |= 1; + + e1000_lower_eec_clk(hw, &eecd); + } + + return data; +} + +/** + * e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion + * @hw: pointer to the HW structure + * @ee_reg: EEPROM flag for polling + * + * Polls the EEPROM status bit for either read or write completion based + * upon the value of 'ee_reg'. + **/ +s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) +{ + u32 attempts = 100000; + u32 i, reg = 0; + + DEBUGFUNC("e1000_poll_eerd_eewr_done"); + + for (i = 0; i < attempts; i++) { + if (ee_reg == E1000_NVM_POLL_READ) + reg = E1000_READ_REG(hw, E1000_EERD); + else + reg = E1000_READ_REG(hw, E1000_EEWR); + + if (reg & E1000_NVM_RW_REG_DONE) + return E1000_SUCCESS; + + usec_delay(5); + } + + return -E1000_ERR_NVM; +} + +/** + * e1000_acquire_nvm_generic - Generic request for access to EEPROM + * @hw: pointer to the HW structure + * + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -E1000_ERR_NVM (-1). + **/ +s32 e1000_acquire_nvm_generic(struct e1000_hw *hw) +{ + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + s32 timeout = E1000_NVM_GRANT_ATTEMPTS; + + DEBUGFUNC("e1000_acquire_nvm_generic"); + + E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ); + eecd = E1000_READ_REG(hw, E1000_EECD); + + while (timeout) { + if (eecd & E1000_EECD_GNT) + break; + usec_delay(5); + eecd = E1000_READ_REG(hw, E1000_EECD); + timeout--; + } + + if (!timeout) { + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, E1000_EECD, eecd); + DEBUGOUT("Could not acquire NVM grant\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_standby_nvm - Return EEPROM to standby state + * @hw: pointer to the HW structure + * + * Return the EEPROM to a standby state. + **/ +static void e1000_standby_nvm(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + + DEBUGFUNC("e1000_standby_nvm"); + + if (nvm->type == e1000_nvm_eeprom_spi) { + /* Toggle CS to flush commands */ + eecd |= E1000_EECD_CS; + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(nvm->delay_usec); + eecd &= ~E1000_EECD_CS; + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(nvm->delay_usec); + } +} + +/** + * e1000_stop_nvm - Terminate EEPROM command + * @hw: pointer to the HW structure + * + * Terminates the current command by inverting the EEPROM's chip select pin. + **/ +static void e1000_stop_nvm(struct e1000_hw *hw) +{ + u32 eecd; + + DEBUGFUNC("e1000_stop_nvm"); + + eecd = E1000_READ_REG(hw, E1000_EECD); + if (hw->nvm.type == e1000_nvm_eeprom_spi) { + /* Pull CS high */ + eecd |= E1000_EECD_CS; + e1000_lower_eec_clk(hw, &eecd); + } +} + +/** + * e1000_release_nvm_generic - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit. + **/ +void e1000_release_nvm_generic(struct e1000_hw *hw) +{ + u32 eecd; + + DEBUGFUNC("e1000_release_nvm_generic"); + + e1000_stop_nvm(hw); + + eecd = E1000_READ_REG(hw, E1000_EECD); + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, E1000_EECD, eecd); +} + +/** + * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write + * @hw: pointer to the HW structure + * + * Setups the EEPROM for reading and writing. + **/ +static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + u8 spi_stat_reg; + + DEBUGFUNC("e1000_ready_nvm_eeprom"); + + if (nvm->type == e1000_nvm_eeprom_spi) { + u16 timeout = NVM_MAX_RETRY_SPI; + + /* Clear SK and CS */ + eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(1); + + /* Read "Status Register" repeatedly until the LSB is cleared. + * The EEPROM will signal that the command has been completed + * by clearing bit 0 of the internal status register. If it's + * not cleared within 'timeout', then error out. + */ + while (timeout) { + e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, + hw->nvm.opcode_bits); + spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8); + if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) + break; + + usec_delay(5); + e1000_standby_nvm(hw); + timeout--; + } + + if (!timeout) { + DEBUGOUT("SPI NVM Status error\n"); + return -E1000_ERR_NVM; + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_nvm_spi - Read EEPROM's using SPI + * @hw: pointer to the HW structure + * @offset: offset of word in the EEPROM to read + * @words: number of words to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM. + **/ +s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 i = 0; + s32 ret_val; + u16 word_in; + u8 read_opcode = NVM_READ_OPCODE_SPI; + + DEBUGFUNC("e1000_read_nvm_spi"); + + /* A check for invalid values: offset too large, too many words, + * and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + return -E1000_ERR_NVM; + } + + ret_val = nvm->ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_ready_nvm_eeprom(hw); + if (ret_val) + goto release; + + e1000_standby_nvm(hw); + + if ((nvm->address_bits == 8) && (offset >= 128)) + read_opcode |= NVM_A8_OPCODE_SPI; + + /* Send the READ command (opcode + addr) */ + e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); + e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits); + + /* Read the data. SPI NVMs increment the address with each byte + * read and will roll over if reading beyond the end. This allows + * us to read the whole NVM from any offset + */ + for (i = 0; i < words; i++) { + word_in = e1000_shift_in_eec_bits(hw, 16); + data[i] = (word_in >> 8) | (word_in << 8); + } + +release: + nvm->ops.release(hw); + + return ret_val; +} + +/** + * e1000_read_nvm_eerd - Reads EEPROM using EERD register + * @hw: pointer to the HW structure + * @offset: offset of word in the EEPROM to read + * @words: number of words to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the EERD register. + **/ +s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 i, eerd = 0; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_read_nvm_eerd"); + + /* A check for invalid values: offset too large, too many words, + * too many words for the offset, and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + return -E1000_ERR_NVM; + } + + for (i = 0; i < words; i++) { + eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + + E1000_NVM_RW_REG_START; + + E1000_WRITE_REG(hw, E1000_EERD, eerd); + ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); + if (ret_val) + break; + + data[i] = (E1000_READ_REG(hw, E1000_EERD) >> + E1000_NVM_RW_REG_DATA); + } + + return ret_val; +} + +/** + * e1000_write_nvm_spi - Write to EEPROM using SPI + * @hw: pointer to the HW structure + * @offset: offset within the EEPROM to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the EEPROM + * + * Writes data to EEPROM at offset using SPI interface. + * + * If e1000_update_nvm_checksum is not called after this function , the + * EEPROM will most likely contain an invalid checksum. + **/ +s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + s32 ret_val = -E1000_ERR_NVM; + u16 widx = 0; + + DEBUGFUNC("e1000_write_nvm_spi"); + + /* A check for invalid values: offset too large, too many words, + * and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + return -E1000_ERR_NVM; + } + + while (widx < words) { + u8 write_opcode = NVM_WRITE_OPCODE_SPI; + + ret_val = nvm->ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_ready_nvm_eeprom(hw); + if (ret_val) { + nvm->ops.release(hw); + return ret_val; + } + + e1000_standby_nvm(hw); + + /* Send the WRITE ENABLE command (8 bit opcode) */ + e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, + nvm->opcode_bits); + + e1000_standby_nvm(hw); + + /* Some SPI eeproms use the 8th address bit embedded in the + * opcode + */ + if ((nvm->address_bits == 8) && (offset >= 128)) + write_opcode |= NVM_A8_OPCODE_SPI; + + /* Send the Write command (8-bit opcode + addr) */ + e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); + e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), + nvm->address_bits); + + /* Loop to allow for up to whole page write of eeprom */ + while (widx < words) { + u16 word_out = data[widx]; + word_out = (word_out >> 8) | (word_out << 8); + e1000_shift_out_eec_bits(hw, word_out, 16); + widx++; + + if ((((offset + widx) * 2) % nvm->page_size) == 0) { + e1000_standby_nvm(hw); + break; + } + } + msec_delay(10); + nvm->ops.release(hw); + } + + return ret_val; +} + +/** + * e1000_read_pba_string_generic - Read device part number + * @hw: pointer to the HW structure + * @pba_num: pointer to device part number + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number from the EEPROM and stores + * the value in pba_num. + **/ +s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, + u32 pba_num_size) +{ + s32 ret_val; + u16 nvm_data; + u16 pba_ptr; + u16 offset; + u16 length; + + DEBUGFUNC("e1000_read_pba_string_generic"); + + if (pba_num == NULL) { + DEBUGOUT("PBA string buffer was null\n"); + return -E1000_ERR_INVALID_ARGUMENT; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + /* if nvm_data is not ptr guard the PBA must be in legacy format which + * means pba_ptr is actually our second data word for the PBA number + * and we can decode it into an ascii string + */ + if (nvm_data != NVM_PBA_PTR_GUARD) { + DEBUGOUT("NVM PBA number is not stored as string\n"); + + /* make sure callers buffer is big enough to store the PBA */ + if (pba_num_size < E1000_PBANUM_LENGTH) { + DEBUGOUT("PBA string buffer too small\n"); + return E1000_ERR_NO_SPACE; + } + + /* extract hex string from data and pba_ptr */ + pba_num[0] = (nvm_data >> 12) & 0xF; + pba_num[1] = (nvm_data >> 8) & 0xF; + pba_num[2] = (nvm_data >> 4) & 0xF; + pba_num[3] = nvm_data & 0xF; + pba_num[4] = (pba_ptr >> 12) & 0xF; + pba_num[5] = (pba_ptr >> 8) & 0xF; + pba_num[6] = '-'; + pba_num[7] = 0; + pba_num[8] = (pba_ptr >> 4) & 0xF; + pba_num[9] = pba_ptr & 0xF; + + /* put a null character on the end of our string */ + pba_num[10] = '\0'; + + /* switch all the data but the '-' to hex char */ + for (offset = 0; offset < 10; offset++) { + if (pba_num[offset] < 0xA) + pba_num[offset] += '0'; + else if (pba_num[offset] < 0x10) + pba_num[offset] += 'A' - 0xA; + } + + return E1000_SUCCESS; + } + + ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (length == 0xFFFF || length == 0) { + DEBUGOUT("NVM PBA number section invalid length\n"); + return -E1000_ERR_NVM_PBA_SECTION; + } + /* check if pba_num buffer is big enough */ + if (pba_num_size < (((u32)length * 2) - 1)) { + DEBUGOUT("PBA string buffer too small\n"); + return -E1000_ERR_NO_SPACE; + } + + /* trim pba length from start of string */ + pba_ptr++; + length--; + + for (offset = 0; offset < length; offset++) { + ret_val = hw->nvm.ops.read(hw, pba_ptr + offset, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + pba_num[offset * 2] = (u8)(nvm_data >> 8); + pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF); + } + pba_num[offset * 2] = '\0'; + + return E1000_SUCCESS; +} + +/** + * e1000_read_pba_length_generic - Read device part number length + * @hw: pointer to the HW structure + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number length from the EEPROM and + * stores the value in pba_num_size. + **/ +s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size) +{ + s32 ret_val; + u16 nvm_data; + u16 pba_ptr; + u16 length; + + DEBUGFUNC("e1000_read_pba_length_generic"); + + if (pba_num_size == NULL) { + DEBUGOUT("PBA buffer size was null\n"); + return -E1000_ERR_INVALID_ARGUMENT; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + /* if data is not ptr guard the PBA must be in legacy format */ + if (nvm_data != NVM_PBA_PTR_GUARD) { + *pba_num_size = E1000_PBANUM_LENGTH; + return E1000_SUCCESS; + } + + ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (length == 0xFFFF || length == 0) { + DEBUGOUT("NVM PBA number section invalid length\n"); + return -E1000_ERR_NVM_PBA_SECTION; + } + + /* Convert from length in u16 values to u8 chars, add 1 for NULL, + * and subtract 2 because length field is included in length. + */ + *pba_num_size = ((u32)length * 2) - 1; + + return E1000_SUCCESS; +} + + + + + +/** + * e1000_read_mac_addr_generic - Read device MAC address + * @hw: pointer to the HW structure + * + * Reads the device MAC address from the EEPROM and stores the value. + * Since devices with two ports use the same EEPROM, we increment the + * last bit in the MAC address for the second port. + **/ +s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) +{ + u32 rar_high; + u32 rar_low; + u16 i; + + rar_high = E1000_READ_REG(hw, E1000_RAH(0)); + rar_low = E1000_READ_REG(hw, E1000_RAL(0)); + + for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8)); + + for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8)); + + for (i = 0; i < ETH_ADDR_LEN; i++) + hw->mac.addr[i] = hw->mac.perm_addr[i]; + + return E1000_SUCCESS; +} + +/** + * e1000_validate_nvm_checksum_generic - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + **/ +s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_validate_nvm_checksum_generic"); + + for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + checksum += nvm_data; + } + + if (checksum != (u16) NVM_SUM) { + DEBUGOUT("NVM Checksum Invalid\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_update_nvm_checksum_generic - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. + **/ +s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_update_nvm_checksum"); + + for (i = 0; i < NVM_CHECKSUM_REG; i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error while updating checksum.\n"); + return ret_val; + } + checksum += nvm_data; + } + checksum = (u16) NVM_SUM - checksum; + ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); + if (ret_val) + DEBUGOUT("NVM Write Error while updating checksum.\n"); + + return ret_val; +} + +/** + * e1000_reload_nvm_generic - Reloads EEPROM + * @hw: pointer to the HW structure + * + * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the + * extended control register. + **/ +static void e1000_reload_nvm_generic(struct e1000_hw *hw) +{ + u32 ctrl_ext; + + DEBUGFUNC("e1000_reload_nvm_generic"); + + usec_delay(10); + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_EE_RST; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_get_fw_version - Get firmware version information + * @hw: pointer to the HW structure + * @fw_vers: pointer to output version structure + * + * unsupported/not present features return 0 in version structure + **/ +void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) +{ + u16 eeprom_verh, eeprom_verl, etrack_test, fw_version; + u8 q, hval, rem, result; + u16 comb_verh, comb_verl, comb_offset; + + memset(fw_vers, 0, sizeof(struct e1000_fw_version)); + + /* basic eeprom version numbers, bits used vary by part and by tool + * used to create the nvm images */ + /* Check which data format we have */ + hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); + switch (hw->mac.type) { + case e1000_i211: + e1000_read_invm_version(hw, fw_vers); + return; + case e1000_82575: + case e1000_82576: + case e1000_82580: + /* Use this format, unless EETRACK ID exists, + * then use alternate format + */ + if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) { + hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); + fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) + >> NVM_MAJOR_SHIFT; + fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK) + >> NVM_MINOR_SHIFT; + fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK); + goto etrack_id; + } + break; + case e1000_i210: + if (!(e1000_get_flash_presence_i210(hw))) { + e1000_read_invm_version(hw, fw_vers); + return; + } + /* fall through */ + case e1000_i350: + case e1000_i354: + /* find combo image version */ + hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); + if ((comb_offset != 0x0) && + (comb_offset != NVM_VER_INVALID)) { + + hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset + + 1), 1, &comb_verh); + hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset), + 1, &comb_verl); + + /* get Option Rom version if it exists and is valid */ + if ((comb_verh && comb_verl) && + ((comb_verh != NVM_VER_INVALID) && + (comb_verl != NVM_VER_INVALID))) { + + fw_vers->or_valid = true; + fw_vers->or_major = + comb_verl >> NVM_COMB_VER_SHFT; + fw_vers->or_build = + (comb_verl << NVM_COMB_VER_SHFT) + | (comb_verh >> NVM_COMB_VER_SHFT); + fw_vers->or_patch = + comb_verh & NVM_COMB_VER_MASK; + } + } + break; + default: + return; + } + hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); + fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) + >> NVM_MAJOR_SHIFT; + + /* check for old style version format in newer images*/ + if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) { + eeprom_verl = (fw_version & NVM_COMB_VER_MASK); + } else { + eeprom_verl = (fw_version & NVM_MINOR_MASK) + >> NVM_MINOR_SHIFT; + } + /* Convert minor value to hex before assigning to output struct + * Val to be converted will not be higher than 99, per tool output + */ + q = eeprom_verl / NVM_HEX_CONV; + hval = q * NVM_HEX_TENS; + rem = eeprom_verl % NVM_HEX_CONV; + result = hval + rem; + fw_vers->eep_minor = result; + +etrack_id: + if ((etrack_test & NVM_MAJOR_MASK) == NVM_ETRACK_VALID) { + hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl); + hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh); + fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) + | eeprom_verl; + } + return; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h new file mode 100644 index 00000000..fe62785a --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h @@ -0,0 +1,75 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_NVM_H_ +#define _E1000_NVM_H_ + + +struct e1000_fw_version { + u32 etrack_id; + u16 eep_major; + u16 eep_minor; + u16 eep_build; + + u8 invm_major; + u8 invm_minor; + u8 invm_img_type; + + bool or_valid; + u16 or_major; + u16 or_build; + u16 or_patch; +}; + + +void e1000_init_nvm_ops_generic(struct e1000_hw *hw); +s32 e1000_null_read_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); +void e1000_null_nvm_generic(struct e1000_hw *hw); +s32 e1000_null_led_default(struct e1000_hw *hw, u16 *data); +s32 e1000_null_write_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); +s32 e1000_acquire_nvm_generic(struct e1000_hw *hw); + +s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg); +s32 e1000_read_mac_addr_generic(struct e1000_hw *hw); +s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, + u32 pba_num_size); +s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size); +s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); +s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data); +s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw); +s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); +s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw); +void e1000_release_nvm_generic(struct e1000_hw *hw); +void e1000_get_fw_version(struct e1000_hw *hw, + struct e1000_fw_version *fw_vers); + +#define E1000_STM_OPCODE 0xDB00 + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h new file mode 100644 index 00000000..d1cf98e2 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h @@ -0,0 +1,136 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +/* glue for the OS independent part of e1000 + * includes register access macros + */ + +#ifndef _E1000_OSDEP_H_ +#define _E1000_OSDEP_H_ + +#include +#include +#include +#include +#include +#include "kcompat.h" + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#define usec_delay(x) udelay(x) +#define usec_delay_irq(x) udelay(x) +#ifndef msec_delay +#define msec_delay(x) do { \ + /* Don't mdelay in interrupt context! */ \ + if (in_interrupt()) \ + BUG(); \ + else \ + msleep(x); \ +} while (0) + +/* Some workarounds require millisecond delays and are run during interrupt + * context. Most notably, when establishing link, the phy may need tweaking + * but cannot process phy register reads/writes faster than millisecond + * intervals...and we establish link due to a "link status change" interrupt. + */ +#define msec_delay_irq(x) mdelay(x) +#endif + +#define PCI_COMMAND_REGISTER PCI_COMMAND +#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE +#define ETH_ADDR_LEN ETH_ALEN + +#ifdef __BIG_ENDIAN +#define E1000_BIG_ENDIAN __BIG_ENDIAN +#endif + + +#ifdef DEBUG +#define DEBUGOUT(S) printk(KERN_DEBUG S) +#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S, ## A) +#else +#define DEBUGOUT(S) +#define DEBUGOUT1(S, A...) +#endif + +#ifdef DEBUG_FUNC +#define DEBUGFUNC(F) DEBUGOUT(F "\n") +#else +#define DEBUGFUNC(F) +#endif +#define DEBUGOUT2 DEBUGOUT1 +#define DEBUGOUT3 DEBUGOUT2 +#define DEBUGOUT7 DEBUGOUT3 + +#define E1000_REGISTER(a, reg) reg + +#define E1000_WRITE_REG(a, reg, value) ( \ + writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg)))) + +#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg))) + +#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \ + writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))) + +#define E1000_READ_REG_ARRAY(a, reg, offset) ( \ + readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))) + +#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY +#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY + +#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \ + writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))) + +#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \ + readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))) + +#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \ + writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))) + +#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \ + readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))) + +#define E1000_WRITE_REG_IO(a, reg, offset) do { \ + outl(reg, ((a)->io_base)); \ + outl(offset, ((a)->io_base + 4)); } while (0) + +#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) + +#define E1000_WRITE_FLASH_REG(a, reg, value) ( \ + writel((value), ((a)->flash_address + reg))) + +#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \ + writew((value), ((a)->flash_address + reg))) + +#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg)) + +#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg)) + +#endif /* _E1000_OSDEP_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c new file mode 100644 index 00000000..df224702 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c @@ -0,0 +1,3405 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +static s32 e1000_wait_autoneg(struct e1000_hw *hw); +/* Cable length tables */ +static const u16 e1000_m88_cable_length_table[] = { + 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; +#define M88E1000_CABLE_LENGTH_TABLE_SIZE \ + (sizeof(e1000_m88_cable_length_table) / \ + sizeof(e1000_m88_cable_length_table[0])) + +static const u16 e1000_igp_2_cable_length_table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3, + 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22, + 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40, + 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61, + 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82, + 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95, + 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121, + 124}; +#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ + (sizeof(e1000_igp_2_cable_length_table) / \ + sizeof(e1000_igp_2_cable_length_table[0])) + +/** + * e1000_init_phy_ops_generic - Initialize PHY function pointers + * @hw: pointer to the HW structure + * + * Setups up the function pointers to no-op functions + **/ +void e1000_init_phy_ops_generic(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + DEBUGFUNC("e1000_init_phy_ops_generic"); + + /* Initialize function pointers */ + phy->ops.init_params = e1000_null_ops_generic; + phy->ops.acquire = e1000_null_ops_generic; + phy->ops.check_polarity = e1000_null_ops_generic; + phy->ops.check_reset_block = e1000_null_ops_generic; + phy->ops.commit = e1000_null_ops_generic; + phy->ops.force_speed_duplex = e1000_null_ops_generic; + phy->ops.get_cfg_done = e1000_null_ops_generic; + phy->ops.get_cable_length = e1000_null_ops_generic; + phy->ops.get_info = e1000_null_ops_generic; + phy->ops.set_page = e1000_null_set_page; + phy->ops.read_reg = e1000_null_read_reg; + phy->ops.read_reg_locked = e1000_null_read_reg; + phy->ops.read_reg_page = e1000_null_read_reg; + phy->ops.release = e1000_null_phy_generic; + phy->ops.reset = e1000_null_ops_generic; + phy->ops.set_d0_lplu_state = e1000_null_lplu_state; + phy->ops.set_d3_lplu_state = e1000_null_lplu_state; + phy->ops.write_reg = e1000_null_write_reg; + phy->ops.write_reg_locked = e1000_null_write_reg; + phy->ops.write_reg_page = e1000_null_write_reg; + phy->ops.power_up = e1000_null_phy_generic; + phy->ops.power_down = e1000_null_phy_generic; + phy->ops.read_i2c_byte = e1000_read_i2c_byte_null; + phy->ops.write_i2c_byte = e1000_write_i2c_byte_null; +} + +/** + * e1000_null_set_page - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_set_page(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG data) +{ + DEBUGFUNC("e1000_null_set_page"); + return E1000_SUCCESS; +} + +/** + * e1000_null_read_reg - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_read_reg(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG *data) +{ + DEBUGFUNC("e1000_null_read_reg"); + return E1000_SUCCESS; +} + +/** + * e1000_null_phy_generic - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_phy_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_phy_generic"); + return; +} + +/** + * e1000_null_lplu_state - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_lplu_state(struct e1000_hw E1000_UNUSEDARG *hw, + bool E1000_UNUSEDARG active) +{ + DEBUGFUNC("e1000_null_lplu_state"); + return E1000_SUCCESS; +} + +/** + * e1000_null_write_reg - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_write_reg(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG data) +{ + DEBUGFUNC("e1000_null_write_reg"); + return E1000_SUCCESS; +} + +/** + * e1000_read_i2c_byte_null - No-op function, return 0 + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: data value read + * + **/ +s32 e1000_read_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG byte_offset, + u8 E1000_UNUSEDARG dev_addr, + u8 E1000_UNUSEDARG *data) +{ + DEBUGFUNC("e1000_read_i2c_byte_null"); + return E1000_SUCCESS; +} + +/** + * e1000_write_i2c_byte_null - No-op function, return 0 + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: data value to write + * + **/ +s32 e1000_write_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG byte_offset, + u8 E1000_UNUSEDARG dev_addr, + u8 E1000_UNUSEDARG data) +{ + DEBUGFUNC("e1000_write_i2c_byte_null"); + return E1000_SUCCESS; +} + +/** + * e1000_check_reset_block_generic - Check if PHY reset is blocked + * @hw: pointer to the HW structure + * + * Read the PHY management control register and check whether a PHY reset + * is blocked. If a reset is not blocked return E1000_SUCCESS, otherwise + * return E1000_BLK_PHY_RESET (12). + **/ +s32 e1000_check_reset_block_generic(struct e1000_hw *hw) +{ + u32 manc; + + DEBUGFUNC("e1000_check_reset_block"); + + manc = E1000_READ_REG(hw, E1000_MANC); + + return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? + E1000_BLK_PHY_RESET : E1000_SUCCESS; +} + +/** + * e1000_get_phy_id - Retrieve the PHY ID and revision + * @hw: pointer to the HW structure + * + * Reads the PHY registers and stores the PHY ID and possibly the PHY + * revision in the hardware structure. + **/ +s32 e1000_get_phy_id(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_id; + + DEBUGFUNC("e1000_get_phy_id"); + + if (!phy->ops.read_reg) + return E1000_SUCCESS; + + ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); + if (ret_val) + return ret_val; + + phy->id = (u32)(phy_id << 16); + usec_delay(20); + ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id); + if (ret_val) + return ret_val; + + phy->id |= (u32)(phy_id & PHY_REVISION_MASK); + phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); + + + return E1000_SUCCESS; +} + +/** + * e1000_phy_reset_dsp_generic - Reset PHY DSP + * @hw: pointer to the HW structure + * + * Reset the digital signal processor. + **/ +s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_phy_reset_dsp_generic"); + + if (!hw->phy.ops.write_reg) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1); + if (ret_val) + return ret_val; + + return hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0); +} + +/** + * e1000_read_phy_reg_mdic - Read MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the MDI control register in the PHY at offset and stores the + * information read to data. + **/ +s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, mdic = 0; + + DEBUGFUNC("e1000_read_phy_reg_mdic"); + + if (offset > MAX_PHY_REG_ADDRESS) { + DEBUGOUT1("PHY Address %d is out of range\n", offset); + return -E1000_ERR_PARAM; + } + + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + mdic = ((offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_READ)); + + E1000_WRITE_REG(hw, E1000_MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usec_delay_irq(50); + mdic = E1000_READ_REG(hw, E1000_MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + DEBUGOUT("MDI Read did not complete\n"); + return -E1000_ERR_PHY; + } + if (mdic & E1000_MDIC_ERROR) { + DEBUGOUT("MDI Error\n"); + return -E1000_ERR_PHY; + } + if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + DEBUGOUT2("MDI Read offset error - requested %d, returned %d\n", + offset, + (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + return -E1000_ERR_PHY; + } + *data = (u16) mdic; + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_mdic - Write MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write to register at offset + * + * Writes data to MDI control register in the PHY at offset. + **/ +s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, mdic = 0; + + DEBUGFUNC("e1000_write_phy_reg_mdic"); + + if (offset > MAX_PHY_REG_ADDRESS) { + DEBUGOUT1("PHY Address %d is out of range\n", offset); + return -E1000_ERR_PARAM; + } + + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + mdic = (((u32)data) | + (offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_WRITE)); + + E1000_WRITE_REG(hw, E1000_MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usec_delay_irq(50); + mdic = E1000_READ_REG(hw, E1000_MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + DEBUGOUT("MDI Write did not complete\n"); + return -E1000_ERR_PHY; + } + if (mdic & E1000_MDIC_ERROR) { + DEBUGOUT("MDI Error\n"); + return -E1000_ERR_PHY; + } + if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + DEBUGOUT2("MDI Write offset error - requested %d, returned %d\n", + offset, + (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + return -E1000_ERR_PHY; + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_phy_reg_i2c - Read PHY register using i2c + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset using the i2c interface and stores the + * retrieved information in data. + **/ +s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, i2ccmd = 0; + + DEBUGFUNC("e1000_read_phy_reg_i2c"); + + /* Set up Op-code, Phy Address, and register address in the I2CCMD + * register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | + (E1000_I2CCMD_OPCODE_READ)); + + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + + /* Poll the ready bit to see if the I2C read completed */ + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); + if (i2ccmd & E1000_I2CCMD_READY) + break; + } + if (!(i2ccmd & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Read did not complete\n"); + return -E1000_ERR_PHY; + } + if (i2ccmd & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + + /* Need to byte-swap the 16-bit value. */ + *data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00); + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_i2c - Write PHY register using i2c + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes the data to PHY register at the offset using the i2c interface. + **/ +s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, i2ccmd = 0; + u16 phy_data_swapped; + + DEBUGFUNC("e1000_write_phy_reg_i2c"); + + /* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/ + if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) { + DEBUGOUT1("PHY I2C Address %d is out of range.\n", + hw->phy.addr); + return -E1000_ERR_CONFIG; + } + + /* Swap the data bytes for the I2C interface */ + phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00); + + /* Set up Op-code, Phy Address, and register address in the I2CCMD + * register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_WRITE | + phy_data_swapped); + + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + + /* Poll the ready bit to see if the I2C read completed */ + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); + if (i2ccmd & E1000_I2CCMD_READY) + break; + } + if (!(i2ccmd & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Write did not complete\n"); + return -E1000_ERR_PHY; + } + if (i2ccmd & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_sfp_data_byte - Reads SFP module data. + * @hw: pointer to the HW structure + * @offset: byte location offset to be read + * @data: read data buffer pointer + * + * Reads one byte from SFP module data stored + * in SFP resided EEPROM memory or SFP diagnostic area. + * Function should be called with + * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access + * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters + * access + **/ +s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data) +{ + u32 i = 0; + u32 i2ccmd = 0; + u32 data_local = 0; + + DEBUGFUNC("e1000_read_sfp_data_byte"); + + if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { + DEBUGOUT("I2CCMD command address exceeds upper limit\n"); + return -E1000_ERR_PHY; + } + + /* Set up Op-code, EEPROM Address,in the I2CCMD + * register. The MAC will take care of interfacing with the + * EEPROM to retrieve the desired data. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_READ); + + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + + /* Poll the ready bit to see if the I2C read completed */ + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + data_local = E1000_READ_REG(hw, E1000_I2CCMD); + if (data_local & E1000_I2CCMD_READY) + break; + } + if (!(data_local & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Read did not complete\n"); + return -E1000_ERR_PHY; + } + if (data_local & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + *data = (u8) data_local & 0xFF; + + return E1000_SUCCESS; +} + +/** + * e1000_write_sfp_data_byte - Writes SFP module data. + * @hw: pointer to the HW structure + * @offset: byte location offset to write to + * @data: data to write + * + * Writes one byte to SFP module data stored + * in SFP resided EEPROM memory or SFP diagnostic area. + * Function should be called with + * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access + * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters + * access + **/ +s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data) +{ + u32 i = 0; + u32 i2ccmd = 0; + u32 data_local = 0; + + DEBUGFUNC("e1000_write_sfp_data_byte"); + + if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { + DEBUGOUT("I2CCMD command address exceeds upper limit\n"); + return -E1000_ERR_PHY; + } + /* The programming interface is 16 bits wide + * so we need to read the whole word first + * then update appropriate byte lane and write + * the updated word back. + */ + /* Set up Op-code, EEPROM Address,in the I2CCMD + * register. The MAC will take care of interfacing + * with an EEPROM to write the data given. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_READ); + /* Set a command to read single word */ + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + /* Poll the ready bit to see if lastly + * launched I2C operation completed + */ + i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); + if (i2ccmd & E1000_I2CCMD_READY) { + /* Check if this is READ or WRITE phase */ + if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) == + E1000_I2CCMD_OPCODE_READ) { + /* Write the selected byte + * lane and update whole word + */ + data_local = i2ccmd & 0xFF00; + data_local |= data; + i2ccmd = ((offset << + E1000_I2CCMD_REG_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_WRITE | data_local); + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + } else { + break; + } + } + } + if (!(i2ccmd & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Write did not complete\n"); + return -E1000_ERR_PHY; + } + if (i2ccmd & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + return E1000_SUCCESS; +} + +/** + * e1000_read_phy_reg_m88 - Read m88 PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Acquires semaphore, if necessary, then reads the PHY register at offset + * and storing the retrieved information in data. Release any acquired + * semaphores before exiting. + **/ +s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_read_phy_reg_m88"); + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, + data); + + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_write_phy_reg_m88 - Write m88 PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore, if necessary, then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_write_phy_reg_m88"); + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, + data); + + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_set_page_igp - Set page as on IGP-like PHY(s) + * @hw: pointer to the HW structure + * @page: page to set (shifted left when necessary) + * + * Sets PHY page required for PHY register access. Assumes semaphore is + * already acquired. Note, this function sets phy.addr to 1 so the caller + * must set it appropriately (if necessary) after this function returns. + **/ +s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page) +{ + DEBUGFUNC("e1000_set_page_igp"); + + DEBUGOUT1("Setting page 0x%x\n", page); + + hw->phy.addr = 1; + + return e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page); +} + +/** + * __e1000_read_phy_reg_igp - Read igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary, then reads the PHY register at offset + * and stores the retrieved information in data. Release any acquired + * semaphores before exiting. + **/ +static s32 __e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data, + bool locked) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("__e1000_read_phy_reg_igp"); + + if (!locked) { + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + if (offset > MAX_PHY_MULTI_PAGE_REG) + ret_val = e1000_write_phy_reg_mdic(hw, + IGP01E1000_PHY_PAGE_SELECT, + (u16)offset); + if (!ret_val) + ret_val = e1000_read_phy_reg_mdic(hw, + MAX_PHY_REG_ADDRESS & offset, + data); + if (!locked) + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_read_phy_reg_igp - Read igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Acquires semaphore then reads the PHY register at offset and stores the + * retrieved information in data. + * Release the acquired semaphore before exiting. + **/ +s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_phy_reg_igp(hw, offset, data, false); +} + +/** + * e1000_read_phy_reg_igp_locked - Read igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset and stores the retrieved information + * in data. Assumes semaphore already acquired. + **/ +s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_phy_reg_igp(hw, offset, data, true); +} + +/** + * e1000_write_phy_reg_igp - Write igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary, then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +static s32 __e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data, + bool locked) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_write_phy_reg_igp"); + + if (!locked) { + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + if (offset > MAX_PHY_MULTI_PAGE_REG) + ret_val = e1000_write_phy_reg_mdic(hw, + IGP01E1000_PHY_PAGE_SELECT, + (u16)offset); + if (!ret_val) + ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & + offset, + data); + if (!locked) + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_write_phy_reg_igp - Write igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_phy_reg_igp(hw, offset, data, false); +} + +/** + * e1000_write_phy_reg_igp_locked - Write igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes the data to PHY register at the offset. + * Assumes semaphore already acquired. + **/ +s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_phy_reg_igp(hw, offset, data, true); +} + +/** + * __e1000_read_kmrn_reg - Read kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary. Then reads the PHY register at offset + * using the kumeran interface. The information retrieved is stored in data. + * Release any acquired semaphores before exiting. + **/ +static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, + bool locked) +{ + u32 kmrnctrlsta; + + DEBUGFUNC("__e1000_read_kmrn_reg"); + + if (!locked) { + s32 ret_val = E1000_SUCCESS; + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & + E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; + E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); + E1000_WRITE_FLUSH(hw); + + usec_delay(2); + + kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA); + *data = (u16)kmrnctrlsta; + + if (!locked) + hw->phy.ops.release(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_read_kmrn_reg_generic - Read kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Acquires semaphore then reads the PHY register at offset using the + * kumeran interface. The information retrieved is stored in data. + * Release the acquired semaphore before exiting. + **/ +s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_kmrn_reg(hw, offset, data, false); +} + +/** + * e1000_read_kmrn_reg_locked - Read kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset using the kumeran interface. The + * information retrieved is stored in data. + * Assumes semaphore already acquired. + **/ +s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_kmrn_reg(hw, offset, data, true); +} + +/** + * __e1000_write_kmrn_reg - Write kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary. Then write the data to PHY register + * at the offset using the kumeran interface. Release any acquired semaphores + * before exiting. + **/ +static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, + bool locked) +{ + u32 kmrnctrlsta; + + DEBUGFUNC("e1000_write_kmrn_reg_generic"); + + if (!locked) { + s32 ret_val = E1000_SUCCESS; + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & + E1000_KMRNCTRLSTA_OFFSET) | data; + E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); + E1000_WRITE_FLUSH(hw); + + usec_delay(2); + + if (!locked) + hw->phy.ops.release(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_write_kmrn_reg_generic - Write kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore then writes the data to the PHY register at the offset + * using the kumeran interface. Release the acquired semaphore before exiting. + **/ +s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_kmrn_reg(hw, offset, data, false); +} + +/** + * e1000_write_kmrn_reg_locked - Write kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Write the data to PHY register at the offset using the kumeran interface. + * Assumes semaphore already acquired. + **/ +s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_kmrn_reg(hw, offset, data, true); +} + +/** + * e1000_set_master_slave_mode - Setup PHY for Master/slave mode + * @hw: pointer to the HW structure + * + * Sets up Master/slave mode + **/ +static s32 e1000_set_master_slave_mode(struct e1000_hw *hw) +{ + s32 ret_val; + u16 phy_data; + + /* Resolve Master/Slave mode */ + ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data); + if (ret_val) + return ret_val; + + /* load defaults for future use */ + hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ? + ((phy_data & CR_1000T_MS_VALUE) ? + e1000_ms_force_master : + e1000_ms_force_slave) : e1000_ms_auto; + + switch (hw->phy.ms_type) { + case e1000_ms_force_master: + phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); + break; + case e1000_ms_force_slave: + phy_data |= CR_1000T_MS_ENABLE; + phy_data &= ~(CR_1000T_MS_VALUE); + break; + case e1000_ms_auto: + phy_data &= ~CR_1000T_MS_ENABLE; + /* fall-through */ + default: + break; + } + + return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data); +} + +/** + * e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link + * @hw: pointer to the HW structure + * + * Sets up Carrier-sense on Transmit and downshift values. + **/ +s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) +{ + s32 ret_val; + u16 phy_data; + + DEBUGFUNC("e1000_copper_link_setup_82577"); + + if (hw->phy.reset_disable) + return E1000_SUCCESS; + + if (hw->phy.type == e1000_phy_82580) { + ret_val = hw->phy.ops.reset(hw); + if (ret_val) { + DEBUGOUT("Error resetting the PHY.\n"); + return ret_val; + } + } + + /* Enable CRS on Tx. This must be set for half-duplex operation. */ + ret_val = hw->phy.ops.read_reg(hw, I82577_CFG_REG, &phy_data); + if (ret_val) + return ret_val; + + phy_data |= I82577_CFG_ASSERT_CRS_ON_TX; + + /* Enable downshift */ + phy_data |= I82577_CFG_ENABLE_DOWNSHIFT; + + ret_val = hw->phy.ops.write_reg(hw, I82577_CFG_REG, phy_data); + if (ret_val) + return ret_val; + + /* Set MDI/MDIX mode */ + ret_val = hw->phy.ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data); + if (ret_val) + return ret_val; + phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; + /* Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = hw->phy.ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data); + if (ret_val) + return ret_val; + + return e1000_set_master_slave_mode(hw); +} + +/** + * e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link + * @hw: pointer to the HW structure + * + * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock + * and downshift values are set also. + **/ +s32 e1000_copper_link_setup_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + + DEBUGFUNC("e1000_copper_link_setup_m88"); + + if (phy->reset_disable) + return E1000_SUCCESS; + + /* Enable CRS on Tx. This must be set for half-duplex operation. */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; + + /* Options: + * MDI/MDI-X = 0 (default) + * 0 - Auto for all speeds + * 1 - MDI mode + * 2 - MDI-X mode + * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) + */ + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + + switch (phy->mdix) { + case 1: + phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; + break; + case 2: + phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; + break; + case 3: + phy_data |= M88E1000_PSCR_AUTO_X_1000T; + break; + case 0: + default: + phy_data |= M88E1000_PSCR_AUTO_X_MODE; + break; + } + + /* Options: + * disable_polarity_correction = 0 (default) + * Automatic Correction for Reversed Cable Polarity + * 0 - Disabled + * 1 - Enabled + */ + phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; + if (phy->disable_polarity_correction) + phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; + + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + if (phy->revision < E1000_REVISION_4) { + /* Force TX_CLK in the Extended PHY Specific Control Register + * to 25MHz clock. + */ + ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, + &phy_data); + if (ret_val) + return ret_val; + + phy_data |= M88E1000_EPSCR_TX_CLK_25; + + if ((phy->revision == E1000_REVISION_2) && + (phy->id == M88E1111_I_PHY_ID)) { + /* 82573L PHY - set the downshift counter to 5x. */ + phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; + phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; + } else { + /* Configure Master and Slave downshift values */ + phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | + M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); + phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | + M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); + } + ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, + phy_data); + if (ret_val) + return ret_val; + } + + /* Commit the changes. */ + ret_val = phy->ops.commit(hw); + if (ret_val) { + DEBUGOUT("Error committing the PHY changes\n"); + return ret_val; + } + + return E1000_SUCCESS; +} + +/** + * e1000_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link + * @hw: pointer to the HW structure + * + * Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's. + * Also enables and sets the downshift parameters. + **/ +s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + + DEBUGFUNC("e1000_copper_link_setup_m88_gen2"); + + if (phy->reset_disable) + return E1000_SUCCESS; + + /* Enable CRS on Tx. This must be set for half-duplex operation. */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + /* Options: + * MDI/MDI-X = 0 (default) + * 0 - Auto for all speeds + * 1 - MDI mode + * 2 - MDI-X mode + * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) + */ + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + + switch (phy->mdix) { + case 1: + phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; + break; + case 2: + phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; + break; + case 3: + /* M88E1112 does not support this mode) */ + if (phy->id != M88E1112_E_PHY_ID) { + phy_data |= M88E1000_PSCR_AUTO_X_1000T; + break; + } + case 0: + default: + phy_data |= M88E1000_PSCR_AUTO_X_MODE; + break; + } + + /* Options: + * disable_polarity_correction = 0 (default) + * Automatic Correction for Reversed Cable Polarity + * 0 - Disabled + * 1 - Enabled + */ + phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; + if (phy->disable_polarity_correction) + phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; + + /* Enable downshift and setting it to X6 */ + if (phy->id == M88E1543_E_PHY_ID) { + phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE; + ret_val = + phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + ret_val = phy->ops.commit(hw); + if (ret_val) { + DEBUGOUT("Error committing the PHY changes\n"); + return ret_val; + } + } + + phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK; + phy_data |= I347AT4_PSCR_DOWNSHIFT_6X; + phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE; + + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + /* Commit the changes. */ + ret_val = phy->ops.commit(hw); + if (ret_val) { + DEBUGOUT("Error committing the PHY changes\n"); + return ret_val; + } + + ret_val = e1000_set_master_slave_mode(hw); + if (ret_val) + return ret_val; + + return E1000_SUCCESS; +} + +/** + * e1000_copper_link_setup_igp - Setup igp PHY's for copper link + * @hw: pointer to the HW structure + * + * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for + * igp PHY's. + **/ +s32 e1000_copper_link_setup_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_copper_link_setup_igp"); + + if (phy->reset_disable) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.reset(hw); + if (ret_val) { + DEBUGOUT("Error resetting the PHY.\n"); + return ret_val; + } + + /* Wait 100ms for MAC to configure PHY from NVM settings, to avoid + * timeout issues when LFS is enabled. + */ + msec_delay(100); + + /* disable lplu d0 during driver init */ + if (hw->phy.ops.set_d0_lplu_state) { + ret_val = hw->phy.ops.set_d0_lplu_state(hw, false); + if (ret_val) { + DEBUGOUT("Error Disabling LPLU D0\n"); + return ret_val; + } + } + /* Configure mdi-mdix settings */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCR_AUTO_MDIX; + + switch (phy->mdix) { + case 1: + data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; + break; + case 2: + data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; + break; + case 0: + default: + data |= IGP01E1000_PSCR_AUTO_MDIX; + break; + } + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data); + if (ret_val) + return ret_val; + + /* set auto-master slave resolution settings */ + if (hw->mac.autoneg) { + /* when autonegotiation advertisement is only 1000Mbps then we + * should disable SmartSpeed and enable Auto MasterSlave + * resolution as hardware default. + */ + if (phy->autoneg_advertised == ADVERTISE_1000_FULL) { + /* Disable SmartSpeed */ + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + return ret_val; + + /* Set auto Master/Slave resolution process */ + ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data); + if (ret_val) + return ret_val; + + data &= ~CR_1000T_MS_ENABLE; + ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data); + if (ret_val) + return ret_val; + } + + ret_val = e1000_set_master_slave_mode(hw); + } + + return ret_val; +} + +/** + * e1000_phy_setup_autoneg - Configure PHY for auto-negotiation + * @hw: pointer to the HW structure + * + * Reads the MII auto-neg advertisement register and/or the 1000T control + * register and if the PHY is already setup for auto-negotiation, then + * return successful. Otherwise, setup advertisement and flow control to + * the appropriate values for the wanted auto-negotiation. + **/ +static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 mii_autoneg_adv_reg; + u16 mii_1000t_ctrl_reg = 0; + + DEBUGFUNC("e1000_phy_setup_autoneg"); + + phy->autoneg_advertised &= phy->autoneg_mask; + + /* Read the MII Auto-Neg Advertisement Register (Address 4). */ + ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); + if (ret_val) + return ret_val; + + if (phy->autoneg_mask & ADVERTISE_1000_FULL) { + /* Read the MII 1000Base-T Control Register (Address 9). */ + ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, + &mii_1000t_ctrl_reg); + if (ret_val) + return ret_val; + } + + /* Need to parse both autoneg_advertised and fc and set up + * the appropriate PHY registers. First we will parse for + * autoneg_advertised software override. Since we can advertise + * a plethora of combinations, we need to check each bit + * individually. + */ + + /* First we clear all the 10/100 mb speed bits in the Auto-Neg + * Advertisement Register (Address 4) and the 1000 mb speed bits in + * the 1000Base-T Control Register (Address 9). + */ + mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | + NWAY_AR_100TX_HD_CAPS | + NWAY_AR_10T_FD_CAPS | + NWAY_AR_10T_HD_CAPS); + mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); + + DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised); + + /* Do we want to advertise 10 Mb Half Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_10_HALF) { + DEBUGOUT("Advertise 10mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; + } + + /* Do we want to advertise 10 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_10_FULL) { + DEBUGOUT("Advertise 10mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; + } + + /* Do we want to advertise 100 Mb Half Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_100_HALF) { + DEBUGOUT("Advertise 100mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; + } + + /* Do we want to advertise 100 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_100_FULL) { + DEBUGOUT("Advertise 100mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; + } + + /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ + if (phy->autoneg_advertised & ADVERTISE_1000_HALF) + DEBUGOUT("Advertise 1000mb Half duplex request denied!\n"); + + /* Do we want to advertise 1000 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { + DEBUGOUT("Advertise 1000mb Full duplex\n"); + mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; + } + + /* Check for a software override of the flow control settings, and + * setup the PHY advertisement registers accordingly. If + * auto-negotiation is enabled, then software will have to set the + * "PAUSE" bits to the correct value in the Auto-Negotiation + * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- + * negotiation. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * but we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: No software override. The flow control configuration + * in the EEPROM is used. + */ + switch (hw->fc.current_mode) { + case e1000_fc_none: + /* Flow control (Rx & Tx) is completely disabled by a + * software over-ride. + */ + mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case e1000_fc_rx_pause: + /* Rx Flow control is enabled, and Tx Flow control is + * disabled, by a software over-ride. + * + * Since there really isn't a way to advertise that we are + * capable of Rx Pause ONLY, we will advertise that we + * support both symmetric and asymmetric Rx PAUSE. Later + * (in e1000_config_fc_after_link_up) we will disable the + * hw's ability to send PAUSE frames. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case e1000_fc_tx_pause: + /* Tx Flow control is enabled, and Rx Flow control is + * disabled, by a software over-ride. + */ + mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; + mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; + break; + case e1000_fc_full: + /* Flow control (both Rx and Tx) is enabled by a software + * over-ride. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); + if (ret_val) + return ret_val; + + DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); + + if (phy->autoneg_mask & ADVERTISE_1000_FULL) + ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, + mii_1000t_ctrl_reg); + + return ret_val; +} + +/** + * e1000_copper_link_autoneg - Setup/Enable autoneg for copper link + * @hw: pointer to the HW structure + * + * Performs initial bounds checking on autoneg advertisement parameter, then + * configure to advertise the full capability. Setup the PHY to autoneg + * and restart the negotiation process between the link partner. If + * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. + **/ +static s32 e1000_copper_link_autoneg(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_ctrl; + + DEBUGFUNC("e1000_copper_link_autoneg"); + + /* Perform some bounds checking on the autoneg advertisement + * parameter. + */ + phy->autoneg_advertised &= phy->autoneg_mask; + + /* If autoneg_advertised is zero, we assume it was not defaulted + * by the calling code so we set to advertise full capability. + */ + if (!phy->autoneg_advertised) + phy->autoneg_advertised = phy->autoneg_mask; + + DEBUGOUT("Reconfiguring auto-neg advertisement params\n"); + ret_val = e1000_phy_setup_autoneg(hw); + if (ret_val) { + DEBUGOUT("Error Setting up Auto-Negotiation\n"); + return ret_val; + } + DEBUGOUT("Restarting Auto-Neg\n"); + + /* Restart auto-negotiation by setting the Auto Neg Enable bit and + * the Auto Neg Restart bit in the PHY control register. + */ + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); + if (ret_val) + return ret_val; + + phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl); + if (ret_val) + return ret_val; + + /* Does the user want to wait for Auto-Neg to complete here, or + * check at a later time (for example, callback routine). + */ + if (phy->autoneg_wait_to_complete) { + ret_val = e1000_wait_autoneg(hw); + if (ret_val) { + DEBUGOUT("Error while waiting for autoneg to complete\n"); + return ret_val; + } + } + + hw->mac.get_link_status = true; + + return ret_val; +} + +/** + * e1000_setup_copper_link_generic - Configure copper link settings + * @hw: pointer to the HW structure + * + * Calls the appropriate function to configure the link for auto-neg or forced + * speed and duplex. Then we check for link, once link is established calls + * to configure collision distance and flow control are called. If link is + * not established, we return -E1000_ERR_PHY (-2). + **/ +s32 e1000_setup_copper_link_generic(struct e1000_hw *hw) +{ + s32 ret_val; + bool link; + + DEBUGFUNC("e1000_setup_copper_link_generic"); + + if (hw->mac.autoneg) { + /* Setup autoneg and flow control advertisement and perform + * autonegotiation. + */ + ret_val = e1000_copper_link_autoneg(hw); + if (ret_val) + return ret_val; + } else { + /* PHY will be set to 10H, 10F, 100H or 100F + * depending on user settings. + */ + DEBUGOUT("Forcing Speed and Duplex\n"); + ret_val = hw->phy.ops.force_speed_duplex(hw); + if (ret_val) { + DEBUGOUT("Error Forcing Speed and Duplex\n"); + return ret_val; + } + } + + /* Check link status. Wait up to 100 microseconds for link to become + * valid. + */ + ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10, + &link); + if (ret_val) + return ret_val; + + if (link) { + DEBUGOUT("Valid link established!!!\n"); + hw->mac.ops.config_collision_dist(hw); + ret_val = e1000_config_fc_after_link_up_generic(hw); + } else { + DEBUGOUT("Unable to establish link!!!\n"); + } + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY + * @hw: pointer to the HW structure + * + * Calls the PHY setup function to force speed and duplex. Clears the + * auto-crossover to force MDI manually. Waits for link and returns + * successful if link up is successful, else -E1000_ERR_PHY (-2). + **/ +s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_igp"); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &phy_data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); + if (ret_val) + return ret_val; + + /* Clear Auto-Crossover to force MDI manually. IGP requires MDI + * forced whenever speed and duplex are forced. + */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; + phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; + + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); + if (ret_val) + return ret_val; + + DEBUGOUT1("IGP PSCR: %X\n", phy_data); + + usec_delay(1); + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) + DEBUGOUT("Link taking longer than expected.\n"); + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + } + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY + * @hw: pointer to the HW structure + * + * Calls the PHY setup function to force speed and duplex. Clears the + * auto-crossover to force MDI manually. Resets the PHY to commit the + * changes. If time expires while waiting for link up, we reset the DSP. + * After reset, TX_CLK and CRS on Tx must be set. Return successful upon + * successful completion, else return corresponding error code. + **/ +s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_m88"); + + /* I210 and I211 devices support Auto-Crossover in forced operation. */ + if (phy->type != e1000_phy_i210) { + /* Clear Auto-Crossover to force MDI manually. M88E1000 + * requires MDI forced whenever speed and duplex are forced. + */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, + &phy_data); + if (ret_val) + return ret_val; + + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, + phy_data); + if (ret_val) + return ret_val; + } + + DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &phy_data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); + if (ret_val) + return ret_val; + + /* Reset the phy to commit changes. */ + ret_val = hw->phy.ops.commit(hw); + if (ret_val) + return ret_val; + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) { + bool reset_dsp = true; + + switch (hw->phy.id) { + case I347AT4_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1543_E_PHY_ID: + case I210_I_PHY_ID: + reset_dsp = false; + break; + default: + if (hw->phy.type != e1000_phy_m88) + reset_dsp = false; + break; + } + + if (!reset_dsp) { + DEBUGOUT("Link taking longer than expected.\n"); + } else { + /* We didn't get link. + * Reset the DSP and cross our fingers. + */ + ret_val = phy->ops.write_reg(hw, + M88E1000_PHY_PAGE_SELECT, + 0x001d); + if (ret_val) + return ret_val; + ret_val = e1000_phy_reset_dsp_generic(hw); + if (ret_val) + return ret_val; + } + } + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + } + + if (hw->phy.type != e1000_phy_m88) + return E1000_SUCCESS; + + if (hw->phy.id == I347AT4_E_PHY_ID || + hw->phy.id == M88E1340M_E_PHY_ID || + hw->phy.id == M88E1112_E_PHY_ID) + return E1000_SUCCESS; + if (hw->phy.id == I210_I_PHY_ID) + return E1000_SUCCESS; + if ((hw->phy.id == M88E1543_E_PHY_ID)) + return E1000_SUCCESS; + ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + /* Resetting the phy means we need to re-force TX_CLK in the + * Extended PHY Specific Control Register to 25MHz clock from + * the reset value of 2.5MHz. + */ + phy_data |= M88E1000_EPSCR_TX_CLK_25; + ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + /* In addition, we must re-enable CRS on Tx for both half and full + * duplex. + */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex + * @hw: pointer to the HW structure + * + * Forces the speed and duplex settings of the PHY. + * This is a function pointer entry point only called by + * PHY setup routines. + **/ +s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_ife"); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data); + if (ret_val) + return ret_val; + + /* Disable MDI-X support for 10/100 */ + ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); + if (ret_val) + return ret_val; + + data &= ~IFE_PMC_AUTO_MDIX; + data &= ~IFE_PMC_FORCE_MDIX; + + ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data); + if (ret_val) + return ret_val; + + DEBUGOUT1("IFE PMC: %X\n", data); + + usec_delay(1); + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) + DEBUGOUT("Link taking longer than expected.\n"); + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + } + + return E1000_SUCCESS; +} + +/** + * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex + * @hw: pointer to the HW structure + * @phy_ctrl: pointer to current value of PHY_CONTROL + * + * Forces speed and duplex on the PHY by doing the following: disable flow + * control, force speed/duplex on the MAC, disable auto speed detection, + * disable auto-negotiation, configure duplex, configure speed, configure + * the collision distance, write configuration to CTRL register. The + * caller must write to the PHY_CONTROL register for these settings to + * take affect. + **/ +void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 ctrl; + + DEBUGFUNC("e1000_phy_force_speed_duplex_setup"); + + /* Turn off flow control when forcing speed/duplex */ + hw->fc.current_mode = e1000_fc_none; + + /* Force speed/duplex on the mac */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + ctrl &= ~E1000_CTRL_SPD_SEL; + + /* Disable Auto Speed Detection */ + ctrl &= ~E1000_CTRL_ASDE; + + /* Disable autoneg on the phy */ + *phy_ctrl &= ~MII_CR_AUTO_NEG_EN; + + /* Forcing Full or Half Duplex? */ + if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) { + ctrl &= ~E1000_CTRL_FD; + *phy_ctrl &= ~MII_CR_FULL_DUPLEX; + DEBUGOUT("Half Duplex\n"); + } else { + ctrl |= E1000_CTRL_FD; + *phy_ctrl |= MII_CR_FULL_DUPLEX; + DEBUGOUT("Full Duplex\n"); + } + + /* Forcing 10mb or 100mb? */ + if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) { + ctrl |= E1000_CTRL_SPD_100; + *phy_ctrl |= MII_CR_SPEED_100; + *phy_ctrl &= ~MII_CR_SPEED_1000; + DEBUGOUT("Forcing 100mb\n"); + } else { + ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); + *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); + DEBUGOUT("Forcing 10mb\n"); + } + + hw->mac.ops.config_collision_dist(hw); + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); +} + +/** + * e1000_set_d3_lplu_state_generic - Sets low power link up state for D3 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D3 + * and SmartSpeed is disabled when active is true, else clear lplu for D3 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. + **/ +s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_set_d3_lplu_state_generic"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); + if (ret_val) + return ret_val; + + if (!active) { + data &= ~IGP02E1000_PM_D3_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + if (ret_val) + return ret_val; + /* LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data |= IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + return ret_val; + } else if (phy->smart_speed == e1000_smart_speed_off) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + return ret_val; + } + } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || + (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || + (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { + data |= IGP02E1000_PM_D3_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + if (ret_val) + return ret_val; + + /* When LPLU is enabled, we should disable SmartSpeed */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + data); + } + + return ret_val; +} + +/** + * e1000_check_downshift_generic - Checks whether a downshift in speed occurred + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns 1 + * + * A downshift is detected by querying the PHY link health. + **/ +s32 e1000_check_downshift_generic(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, offset, mask; + + DEBUGFUNC("e1000_check_downshift_generic"); + + switch (phy->type) { + case e1000_phy_i210: + case e1000_phy_m88: + case e1000_phy_gg82563: + offset = M88E1000_PHY_SPEC_STATUS; + mask = M88E1000_PSSR_DOWNSHIFT; + break; + case e1000_phy_igp_2: + case e1000_phy_igp_3: + offset = IGP01E1000_PHY_LINK_HEALTH; + mask = IGP01E1000_PLHR_SS_DOWNGRADE; + break; + default: + /* speed downshift not supported */ + phy->speed_downgraded = false; + return E1000_SUCCESS; + } + + ret_val = phy->ops.read_reg(hw, offset, &phy_data); + + if (!ret_val) + phy->speed_downgraded = !!(phy_data & mask); + + return ret_val; +} + +/** + * e1000_check_polarity_m88 - Checks the polarity. + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns -E1000_ERR_PHY (-2) + * + * Polarity is determined based on the PHY specific status register. + **/ +s32 e1000_check_polarity_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_check_polarity_m88"); + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data); + + if (!ret_val) + phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_check_polarity_igp - Checks the polarity. + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns -E1000_ERR_PHY (-2) + * + * Polarity is determined based on the PHY port status register, and the + * current speed (since there is no polarity at 100Mbps). + **/ +s32 e1000_check_polarity_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data, offset, mask; + + DEBUGFUNC("e1000_check_polarity_igp"); + + /* Polarity is determined based on the speed of + * our connection. + */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); + if (ret_val) + return ret_val; + + if ((data & IGP01E1000_PSSR_SPEED_MASK) == + IGP01E1000_PSSR_SPEED_1000MBPS) { + offset = IGP01E1000_PHY_PCS_INIT_REG; + mask = IGP01E1000_PHY_POLARITY_MASK; + } else { + /* This really only applies to 10Mbps since + * there is no polarity for 100Mbps (always 0). + */ + offset = IGP01E1000_PHY_PORT_STATUS; + mask = IGP01E1000_PSSR_POLARITY_REVERSED; + } + + ret_val = phy->ops.read_reg(hw, offset, &data); + + if (!ret_val) + phy->cable_polarity = ((data & mask) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_check_polarity_ife - Check cable polarity for IFE PHY + * @hw: pointer to the HW structure + * + * Polarity is determined on the polarity reversal feature being enabled. + **/ +s32 e1000_check_polarity_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, offset, mask; + + DEBUGFUNC("e1000_check_polarity_ife"); + + /* Polarity is determined based on the reversal feature being enabled. + */ + if (phy->polarity_correction) { + offset = IFE_PHY_EXTENDED_STATUS_CONTROL; + mask = IFE_PESC_POLARITY_REVERSED; + } else { + offset = IFE_PHY_SPECIAL_CONTROL; + mask = IFE_PSC_FORCE_POLARITY; + } + + ret_val = phy->ops.read_reg(hw, offset, &phy_data); + + if (!ret_val) + phy->cable_polarity = ((phy_data & mask) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_wait_autoneg - Wait for auto-neg completion + * @hw: pointer to the HW structure + * + * Waits for auto-negotiation to complete or for the auto-negotiation time + * limit to expire, which ever happens first. + **/ +static s32 e1000_wait_autoneg(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 i, phy_status; + + DEBUGFUNC("e1000_wait_autoneg"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ + for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + if (phy_status & MII_SR_AUTONEG_COMPLETE) + break; + msec_delay(100); + } + + /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation + * has completed. + */ + return ret_val; +} + +/** + * e1000_phy_has_link_generic - Polls PHY for link + * @hw: pointer to the HW structure + * @iterations: number of times to poll for link + * @usec_interval: delay between polling attempts + * @success: pointer to whether polling was successful or not + * + * Polls the PHY status register for link, 'iterations' number of times. + **/ +s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, + u32 usec_interval, bool *success) +{ + s32 ret_val = E1000_SUCCESS; + u16 i, phy_status; + + DEBUGFUNC("e1000_phy_has_link_generic"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + for (i = 0; i < iterations; i++) { + /* Some PHYs require the PHY_STATUS register to be read + * twice due to the link bit being sticky. No harm doing + * it across the board. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + /* If the first read fails, another entity may have + * ownership of the resources, wait and try again to + * see if they have relinquished the resources yet. + */ + usec_delay(usec_interval); + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + if (phy_status & MII_SR_LINK_STATUS) + break; + if (usec_interval >= 1000) + msec_delay_irq(usec_interval/1000); + else + usec_delay(usec_interval); + } + + *success = (i < iterations); + + return ret_val; +} + +/** + * e1000_get_cable_length_m88 - Determine cable length for m88 PHY + * @hw: pointer to the HW structure + * + * Reads the PHY specific status register to retrieve the cable length + * information. The cable length is determined by averaging the minimum and + * maximum values to get the "average" cable length. The m88 PHY has four + * possible cable length values, which are: + * Register Value Cable Length + * 0 < 50 meters + * 1 50 - 80 meters + * 2 80 - 110 meters + * 3 110 - 140 meters + * 4 > 140 meters + **/ +s32 e1000_get_cable_length_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, index; + + DEBUGFUNC("e1000_get_cable_length_m88"); + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); + if (ret_val) + return ret_val; + + index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> + M88E1000_PSSR_CABLE_LENGTH_SHIFT); + + if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) + return -E1000_ERR_PHY; + + phy->min_cable_length = e1000_m88_cable_length_table[index]; + phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; + + phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; + + return E1000_SUCCESS; +} + +s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, phy_data2, is_cm; + u16 index, default_page; + + DEBUGFUNC("e1000_get_cable_length_m88_gen2"); + + switch (hw->phy.id) { + case I210_I_PHY_ID: + /* Get cable length from PHY Cable Diagnostics Control Reg */ + ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + + (I347AT4_PCDL + phy->addr), + &phy_data); + if (ret_val) + return ret_val; + + /* Check if the unit of cable length is meters or cm */ + ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + + I347AT4_PCDC, &phy_data2); + if (ret_val) + return ret_val; + + is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); + + /* Populate the phy structure with cable length in meters */ + phy->min_cable_length = phy_data / (is_cm ? 100 : 1); + phy->max_cable_length = phy_data / (is_cm ? 100 : 1); + phy->cable_length = phy_data / (is_cm ? 100 : 1); + break; + case M88E1543_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case I347AT4_E_PHY_ID: + /* Remember the original page select and set it to 7 */ + ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, + &default_page); + if (ret_val) + return ret_val; + + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07); + if (ret_val) + return ret_val; + + /* Get cable length from PHY Cable Diagnostics Control Reg */ + ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr), + &phy_data); + if (ret_val) + return ret_val; + + /* Check if the unit of cable length is meters or cm */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2); + if (ret_val) + return ret_val; + + is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); + + /* Populate the phy structure with cable length in meters */ + phy->min_cable_length = phy_data / (is_cm ? 100 : 1); + phy->max_cable_length = phy_data / (is_cm ? 100 : 1); + phy->cable_length = phy_data / (is_cm ? 100 : 1); + + /* Reset the page select to its original value */ + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, + default_page); + if (ret_val) + return ret_val; + break; + + case M88E1112_E_PHY_ID: + /* Remember the original page select and set it to 5 */ + ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, + &default_page); + if (ret_val) + return ret_val; + + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE, + &phy_data); + if (ret_val) + return ret_val; + + index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> + M88E1000_PSSR_CABLE_LENGTH_SHIFT; + + if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) + return -E1000_ERR_PHY; + + phy->min_cable_length = e1000_m88_cable_length_table[index]; + phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; + + phy->cable_length = (phy->min_cable_length + + phy->max_cable_length) / 2; + + /* Reset the page select to its original value */ + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, + default_page); + if (ret_val) + return ret_val; + + break; + default: + return -E1000_ERR_PHY; + } + + return ret_val; +} + +/** + * e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY + * @hw: pointer to the HW structure + * + * The automatic gain control (agc) normalizes the amplitude of the + * received signal, adjusting for the attenuation produced by the + * cable. By reading the AGC registers, which represent the + * combination of coarse and fine gain value, the value can be put + * into a lookup table to obtain the approximate cable length + * for each channel. + **/ +s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, i, agc_value = 0; + u16 cur_agc_index, max_agc_index = 0; + u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; + static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { + IGP02E1000_PHY_AGC_A, + IGP02E1000_PHY_AGC_B, + IGP02E1000_PHY_AGC_C, + IGP02E1000_PHY_AGC_D + }; + + DEBUGFUNC("e1000_get_cable_length_igp_2"); + + /* Read the AGC registers for all channels */ + for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { + ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data); + if (ret_val) + return ret_val; + + /* Getting bits 15:9, which represent the combination of + * coarse and fine gain values. The result is a number + * that can be put into the lookup table to obtain the + * approximate cable length. + */ + cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & + IGP02E1000_AGC_LENGTH_MASK); + + /* Array index bound check. */ + if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || + (cur_agc_index == 0)) + return -E1000_ERR_PHY; + + /* Remove min & max AGC values from calculation. */ + if (e1000_igp_2_cable_length_table[min_agc_index] > + e1000_igp_2_cable_length_table[cur_agc_index]) + min_agc_index = cur_agc_index; + if (e1000_igp_2_cable_length_table[max_agc_index] < + e1000_igp_2_cable_length_table[cur_agc_index]) + max_agc_index = cur_agc_index; + + agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; + } + + agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + + e1000_igp_2_cable_length_table[max_agc_index]); + agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); + + /* Calculate cable length with the error range of +/- 10 meters. */ + phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ? + (agc_value - IGP02E1000_AGC_RANGE) : 0); + phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; + + phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; + + return E1000_SUCCESS; +} + +/** + * e1000_get_phy_info_m88 - Retrieve PHY information + * @hw: pointer to the HW structure + * + * Valid for only copper links. Read the PHY status register (sticky read) + * to verify that link is up. Read the PHY special control register to + * determine the polarity and 10base-T extended distance. Read the PHY + * special status register to determine MDI/MDIx and current speed. If + * speed is 1000, then determine cable length, local and remote receiver. + **/ +s32 e1000_get_phy_info_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_m88"); + + if (phy->media_type != e1000_media_type_copper) { + DEBUGOUT("Phy info is only valid for copper media\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy->polarity_correction = !!(phy_data & + M88E1000_PSCR_POLARITY_REVERSAL); + + ret_val = e1000_check_polarity_m88(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX); + + if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { + ret_val = hw->phy.ops.get_cable_length(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data); + if (ret_val) + return ret_val; + + phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + + phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + } else { + /* Set values to "undefined" */ + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + } + + return ret_val; +} + +/** + * e1000_get_phy_info_igp - Retrieve igp PHY information + * @hw: pointer to the HW structure + * + * Read PHY status to determine if link is up. If link is up, then + * set/determine 10base-T extended distance and polarity correction. Read + * PHY port status to determine MDI/MDIx and speed. Based on the speed, + * determine on the cable length, local and remote receiver. + **/ +s32 e1000_get_phy_info_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_igp"); + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + phy->polarity_correction = true; + + ret_val = e1000_check_polarity_igp(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX); + + if ((data & IGP01E1000_PSSR_SPEED_MASK) == + IGP01E1000_PSSR_SPEED_1000MBPS) { + ret_val = phy->ops.get_cable_length(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); + if (ret_val) + return ret_val; + + phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + + phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + } else { + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + } + + return ret_val; +} + +/** + * e1000_get_phy_info_ife - Retrieves various IFE PHY states + * @hw: pointer to the HW structure + * + * Populates "phy" structure with various feature states. + **/ +s32 e1000_get_phy_info_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_ife"); + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = phy->ops.read_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data); + if (ret_val) + return ret_val; + phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE); + + if (phy->polarity_correction) { + ret_val = e1000_check_polarity_ife(hw); + if (ret_val) + return ret_val; + } else { + /* Polarity is forced */ + phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + } + + ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS); + + /* The following parameters are undefined for 10/100 operation. */ + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + + return E1000_SUCCESS; +} + +/** + * e1000_phy_sw_reset_generic - PHY software reset + * @hw: pointer to the HW structure + * + * Does a software reset of the PHY by reading the PHY control register and + * setting/write the control register reset bit to the PHY. + **/ +s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 phy_ctrl; + + DEBUGFUNC("e1000_phy_sw_reset_generic"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); + if (ret_val) + return ret_val; + + phy_ctrl |= MII_CR_RESET; + ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl); + if (ret_val) + return ret_val; + + usec_delay(1); + + return ret_val; +} + +/** + * e1000_phy_hw_reset_generic - PHY hardware reset + * @hw: pointer to the HW structure + * + * Verify the reset block is not blocking us from resetting. Acquire + * semaphore (if necessary) and read/set/write the device control reset + * bit in the PHY. Wait the appropriate delay time for the device to + * reset and release the semaphore (if necessary). + **/ +s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u32 ctrl; + + DEBUGFUNC("e1000_phy_hw_reset_generic"); + + if (phy->ops.check_reset_block) { + ret_val = phy->ops.check_reset_block(hw); + if (ret_val) + return E1000_SUCCESS; + } + + ret_val = phy->ops.acquire(hw); + if (ret_val) + return ret_val; + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST); + E1000_WRITE_FLUSH(hw); + + usec_delay(phy->reset_delay_us); + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + + usec_delay(150); + + phy->ops.release(hw); + + return phy->ops.get_cfg_done(hw); +} + +/** + * e1000_get_cfg_done_generic - Generic configuration done + * @hw: pointer to the HW structure + * + * Generic function to wait 10 milli-seconds for configuration to complete + * and return success. + **/ +s32 e1000_get_cfg_done_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_get_cfg_done_generic"); + + msec_delay_irq(10); + + return E1000_SUCCESS; +} + +/** + * e1000_phy_init_script_igp3 - Inits the IGP3 PHY + * @hw: pointer to the HW structure + * + * Initializes a Intel Gigabit PHY3 when an EEPROM is not present. + **/ +s32 e1000_phy_init_script_igp3(struct e1000_hw *hw) +{ + DEBUGOUT("Running IGP 3 PHY init script\n"); + + /* PHY init IGP 3 */ + /* Enable rise/fall, 10-mode work in class-A */ + hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018); + /* Remove all caps from Replica path filter */ + hw->phy.ops.write_reg(hw, 0x2F52, 0x0000); + /* Bias trimming for ADC, AFE and Driver (Default) */ + hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24); + /* Increase Hybrid poly bias */ + hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0); + /* Add 4% to Tx amplitude in Gig mode */ + hw->phy.ops.write_reg(hw, 0x2010, 0x10B0); + /* Disable trimming (TTT) */ + hw->phy.ops.write_reg(hw, 0x2011, 0x0000); + /* Poly DC correction to 94.6% + 2% for all channels */ + hw->phy.ops.write_reg(hw, 0x20DD, 0x249A); + /* ABS DC correction to 95.9% */ + hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3); + /* BG temp curve trim */ + hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE); + /* Increasing ADC OPAMP stage 1 currents to max */ + hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4); + /* Force 1000 ( required for enabling PHY regs configuration) */ + hw->phy.ops.write_reg(hw, 0x0000, 0x0140); + /* Set upd_freq to 6 */ + hw->phy.ops.write_reg(hw, 0x1F30, 0x1606); + /* Disable NPDFE */ + hw->phy.ops.write_reg(hw, 0x1F31, 0xB814); + /* Disable adaptive fixed FFE (Default) */ + hw->phy.ops.write_reg(hw, 0x1F35, 0x002A); + /* Enable FFE hysteresis */ + hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067); + /* Fixed FFE for short cable lengths */ + hw->phy.ops.write_reg(hw, 0x1F54, 0x0065); + /* Fixed FFE for medium cable lengths */ + hw->phy.ops.write_reg(hw, 0x1F55, 0x002A); + /* Fixed FFE for long cable lengths */ + hw->phy.ops.write_reg(hw, 0x1F56, 0x002A); + /* Enable Adaptive Clip Threshold */ + hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0); + /* AHT reset limit to 1 */ + hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF); + /* Set AHT master delay to 127 msec */ + hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC); + /* Set scan bits for AHT */ + hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF); + /* Set AHT Preset bits */ + hw->phy.ops.write_reg(hw, 0x1F79, 0x0210); + /* Change integ_factor of channel A to 3 */ + hw->phy.ops.write_reg(hw, 0x1895, 0x0003); + /* Change prop_factor of channels BCD to 8 */ + hw->phy.ops.write_reg(hw, 0x1796, 0x0008); + /* Change cg_icount + enable integbp for channels BCD */ + hw->phy.ops.write_reg(hw, 0x1798, 0xD008); + /* Change cg_icount + enable integbp + change prop_factor_master + * to 8 for channel A + */ + hw->phy.ops.write_reg(hw, 0x1898, 0xD918); + /* Disable AHT in Slave mode on channel A */ + hw->phy.ops.write_reg(hw, 0x187A, 0x0800); + /* Enable LPLU and disable AN to 1000 in non-D0a states, + * Enable SPD+B2B + */ + hw->phy.ops.write_reg(hw, 0x0019, 0x008D); + /* Enable restart AN on an1000_dis change */ + hw->phy.ops.write_reg(hw, 0x001B, 0x2080); + /* Enable wh_fifo read clock in 10/100 modes */ + hw->phy.ops.write_reg(hw, 0x0014, 0x0045); + /* Restart AN, Speed selection is 1000 */ + hw->phy.ops.write_reg(hw, 0x0000, 0x1340); + + return E1000_SUCCESS; +} + +/** + * e1000_get_phy_type_from_id - Get PHY type from id + * @phy_id: phy_id read from the phy + * + * Returns the phy type from the id. + **/ +enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id) +{ + enum e1000_phy_type phy_type = e1000_phy_unknown; + + switch (phy_id) { + case M88E1000_I_PHY_ID: + case M88E1000_E_PHY_ID: + case M88E1111_I_PHY_ID: + case M88E1011_I_PHY_ID: + case M88E1543_E_PHY_ID: + case I347AT4_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1340M_E_PHY_ID: + phy_type = e1000_phy_m88; + break; + case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */ + phy_type = e1000_phy_igp_2; + break; + case GG82563_E_PHY_ID: + phy_type = e1000_phy_gg82563; + break; + case IGP03E1000_E_PHY_ID: + phy_type = e1000_phy_igp_3; + break; + case IFE_E_PHY_ID: + case IFE_PLUS_E_PHY_ID: + case IFE_C_E_PHY_ID: + phy_type = e1000_phy_ife; + break; + case I82580_I_PHY_ID: + phy_type = e1000_phy_82580; + break; + case I210_I_PHY_ID: + phy_type = e1000_phy_i210; + break; + default: + phy_type = e1000_phy_unknown; + break; + } + return phy_type; +} + +/** + * e1000_determine_phy_address - Determines PHY address. + * @hw: pointer to the HW structure + * + * This uses a trial and error method to loop through possible PHY + * addresses. It tests each by reading the PHY ID registers and + * checking for a match. + **/ +s32 e1000_determine_phy_address(struct e1000_hw *hw) +{ + u32 phy_addr = 0; + u32 i; + enum e1000_phy_type phy_type = e1000_phy_unknown; + + hw->phy.id = phy_type; + + for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) { + hw->phy.addr = phy_addr; + i = 0; + + do { + e1000_get_phy_id(hw); + phy_type = e1000_get_phy_type_from_id(hw->phy.id); + + /* If phy_type is valid, break - we found our + * PHY address + */ + if (phy_type != e1000_phy_unknown) + return E1000_SUCCESS; + + msec_delay(1); + i++; + } while (i < 10); + } + + return -E1000_ERR_PHY_TYPE; +} + +/** + * e1000_power_up_phy_copper - Restore copper link in case of PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, or wake on lan is not enabled, restore the link to previous + * settings. + **/ +void e1000_power_up_phy_copper(struct e1000_hw *hw) +{ + u16 mii_reg = 0; + u16 power_reg = 0; + + /* The PHY will retain its settings across a power down/up cycle */ + hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); + mii_reg &= ~MII_CR_POWER_DOWN; + if (hw->phy.type == e1000_phy_i210) { + hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg); + power_reg &= ~GS40G_CS_POWER_DOWN; + hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg); + } + hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); +} + +/** + * e1000_power_down_phy_copper - Restore copper link in case of PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, or wake on lan is not enabled, restore the link to previous + * settings. + **/ +void e1000_power_down_phy_copper(struct e1000_hw *hw) +{ + u16 mii_reg = 0; + u16 power_reg = 0; + + /* The PHY will retain its settings across a power down/up cycle */ + hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); + mii_reg |= MII_CR_POWER_DOWN; + /* i210 Phy requires an additional bit for power up/down */ + if (hw->phy.type == e1000_phy_i210) { + hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg); + power_reg |= GS40G_CS_POWER_DOWN; + hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg); + } + hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); + msec_delay(1); +} + +/** + * e1000_check_polarity_82577 - Checks the polarity. + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns -E1000_ERR_PHY (-2) + * + * Polarity is determined based on the PHY specific status register. + **/ +s32 e1000_check_polarity_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_check_polarity_82577"); + + ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); + + if (!ret_val) + phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY + * @hw: pointer to the HW structure + * + * Calls the PHY setup function to force speed and duplex. + **/ +s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_82577"); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &phy_data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); + if (ret_val) + return ret_val; + + usec_delay(1); + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on 82577 phy\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) + DEBUGOUT("Link taking longer than expected.\n"); + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + } + + return ret_val; +} + +/** + * e1000_get_phy_info_82577 - Retrieve I82577 PHY information + * @hw: pointer to the HW structure + * + * Read PHY status to determine if link is up. If link is up, then + * set/determine 10base-T extended distance and polarity correction. Read + * PHY port status to determine MDI/MDIx and speed. Based on the speed, + * determine on the cable length, local and remote receiver. + **/ +s32 e1000_get_phy_info_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_82577"); + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + phy->polarity_correction = true; + + ret_val = e1000_check_polarity_82577(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX); + + if ((data & I82577_PHY_STATUS2_SPEED_MASK) == + I82577_PHY_STATUS2_SPEED_1000MBPS) { + ret_val = hw->phy.ops.get_cable_length(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); + if (ret_val) + return ret_val; + + phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + + phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + } else { + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + } + + return E1000_SUCCESS; +} + +/** + * e1000_get_cable_length_82577 - Determine cable length for 82577 PHY + * @hw: pointer to the HW structure + * + * Reads the diagnostic status register and verifies result is valid before + * placing it in the phy_cable_length field. + **/ +s32 e1000_get_cable_length_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, length; + + DEBUGFUNC("e1000_get_cable_length_82577"); + + ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data); + if (ret_val) + return ret_val; + + length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >> + I82577_DSTATUS_CABLE_LENGTH_SHIFT); + + if (length == E1000_CABLE_LENGTH_UNDEFINED) + return -E1000_ERR_PHY; + + phy->cable_length = length; + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_gs40g - Write GS40G PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore, if necessary, then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data) +{ + s32 ret_val; + u16 page = offset >> GS40G_PAGE_SHIFT; + + DEBUGFUNC("e1000_write_phy_reg_gs40g"); + + offset = offset & GS40G_OFFSET_MASK; + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); + if (ret_val) + goto release; + ret_val = e1000_write_phy_reg_mdic(hw, offset, data); + +release: + hw->phy.ops.release(hw); + return ret_val; +} + +/** + * e1000_read_phy_reg_gs40g - Read GS40G PHY register + * @hw: pointer to the HW structure + * @offset: lower half is register offset to read to + * upper half is page to use. + * @data: data to read at register offset + * + * Acquires semaphore, if necessary, then reads the data in the PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data) +{ + s32 ret_val; + u16 page = offset >> GS40G_PAGE_SHIFT; + + DEBUGFUNC("e1000_read_phy_reg_gs40g"); + + offset = offset & GS40G_OFFSET_MASK; + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); + if (ret_val) + goto release; + ret_val = e1000_read_phy_reg_mdic(hw, offset, data); + +release: + hw->phy.ops.release(hw); + return ret_val; +} + +/** + * e1000_read_phy_reg_mphy - Read mPHY control register + * @hw: pointer to the HW structure + * @address: address to be read + * @data: pointer to the read data + * + * Reads the mPHY control register in the PHY at offset and stores the + * information read to data. + **/ +s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data) +{ + u32 mphy_ctrl = 0; + bool locked = false; + bool ready = false; + + DEBUGFUNC("e1000_read_phy_reg_mphy"); + + /* Check if mPHY is ready to read/write operations */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* Check if mPHY access is disabled and enable it if so */ + mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); + if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { + locked = true; + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + mphy_ctrl |= E1000_MPHY_ENA_ACCESS; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + } + + /* Set the address that we want to read */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* We mask address, because we want to use only current lane */ + mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK & + ~E1000_MPHY_ADDRESS_FNC_OVERRIDE) | + (address & E1000_MPHY_ADDRESS_MASK); + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + + /* Read data from the address */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + *data = E1000_READ_REG(hw, E1000_MPHY_DATA); + + /* Disable access to mPHY if it was originally disabled */ + if (locked) + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, + E1000_MPHY_DIS_ACCESS); + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_mphy - Write mPHY control register + * @hw: pointer to the HW structure + * @address: address to write to + * @data: data to write to register at offset + * @line_override: used when we want to use different line than default one + * + * Writes data to mPHY control register. + **/ +s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, + bool line_override) +{ + u32 mphy_ctrl = 0; + bool locked = false; + bool ready = false; + + DEBUGFUNC("e1000_write_phy_reg_mphy"); + + /* Check if mPHY is ready to read/write operations */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* Check if mPHY access is disabled and enable it if so */ + mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); + if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { + locked = true; + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + mphy_ctrl |= E1000_MPHY_ENA_ACCESS; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + } + + /* Set the address that we want to read */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* We mask address, because we want to use only current lane */ + if (line_override) + mphy_ctrl |= E1000_MPHY_ADDRESS_FNC_OVERRIDE; + else + mphy_ctrl &= ~E1000_MPHY_ADDRESS_FNC_OVERRIDE; + mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK) | + (address & E1000_MPHY_ADDRESS_MASK); + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + + /* Read data from the address */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + E1000_WRITE_REG(hw, E1000_MPHY_DATA, data); + + /* Disable access to mPHY if it was originally disabled */ + if (locked) + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, + E1000_MPHY_DIS_ACCESS); + + return E1000_SUCCESS; +} + +/** + * e1000_is_mphy_ready - Check if mPHY control register is not busy + * @hw: pointer to the HW structure + * + * Returns mPHY control register status. + **/ +bool e1000_is_mphy_ready(struct e1000_hw *hw) +{ + u16 retry_count = 0; + u32 mphy_ctrl = 0; + bool ready = false; + + while (retry_count < 2) { + mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); + if (mphy_ctrl & E1000_MPHY_BUSY) { + usec_delay(20); + retry_count++; + continue; + } + ready = true; + break; + } + + if (!ready) + DEBUGOUT("ERROR READING mPHY control register, phy is busy.\n"); + + return ready; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h new file mode 100644 index 00000000..5387c5e7 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h @@ -0,0 +1,256 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_PHY_H_ +#define _E1000_PHY_H_ + +void e1000_init_phy_ops_generic(struct e1000_hw *hw); +s32 e1000_null_read_reg(struct e1000_hw *hw, u32 offset, u16 *data); +void e1000_null_phy_generic(struct e1000_hw *hw); +s32 e1000_null_lplu_state(struct e1000_hw *hw, bool active); +s32 e1000_null_write_reg(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_null_set_page(struct e1000_hw *hw, u16 data); +s32 e1000_read_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +s32 e1000_write_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); +s32 e1000_check_downshift_generic(struct e1000_hw *hw); +s32 e1000_check_polarity_m88(struct e1000_hw *hw); +s32 e1000_check_polarity_igp(struct e1000_hw *hw); +s32 e1000_check_polarity_ife(struct e1000_hw *hw); +s32 e1000_check_reset_block_generic(struct e1000_hw *hw); +s32 e1000_copper_link_setup_igp(struct e1000_hw *hw); +s32 e1000_copper_link_setup_m88(struct e1000_hw *hw); +s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw); +s32 e1000_get_cable_length_m88(struct e1000_hw *hw); +s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw); +s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw); +s32 e1000_get_cfg_done_generic(struct e1000_hw *hw); +s32 e1000_get_phy_id(struct e1000_hw *hw); +s32 e1000_get_phy_info_igp(struct e1000_hw *hw); +s32 e1000_get_phy_info_m88(struct e1000_hw *hw); +s32 e1000_get_phy_info_ife(struct e1000_hw *hw); +s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw); +void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl); +s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw); +s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw); +s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page); +s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active); +s32 e1000_setup_copper_link_generic(struct e1000_hw *hw); +s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, + u32 usec_interval, bool *success); +s32 e1000_phy_init_script_igp3(struct e1000_hw *hw); +enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id); +s32 e1000_determine_phy_address(struct e1000_hw *hw); +s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg); +s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg); +void e1000_power_up_phy_copper(struct e1000_hw *hw); +void e1000_power_down_phy_copper(struct e1000_hw *hw); +s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data); +s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data); +s32 e1000_copper_link_setup_82577(struct e1000_hw *hw); +s32 e1000_check_polarity_82577(struct e1000_hw *hw); +s32 e1000_get_phy_info_82577(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw); +s32 e1000_get_cable_length_82577(struct e1000_hw *hw); +s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data); +s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, + bool line_override); +bool e1000_is_mphy_ready(struct e1000_hw *hw); + +#define E1000_MAX_PHY_ADDR 8 + +/* IGP01E1000 Specific Registers */ +#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* Port Config */ +#define IGP01E1000_PHY_PORT_STATUS 0x11 /* Status */ +#define IGP01E1000_PHY_PORT_CTRL 0x12 /* Control */ +#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health */ +#define IGP02E1000_PHY_POWER_MGMT 0x19 /* Power Management */ +#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* Page Select */ +#define BM_PHY_PAGE_SELECT 22 /* Page Select for BM */ +#define IGP_PAGE_SHIFT 5 +#define PHY_REG_MASK 0x1F + +/* GS40G - I210 PHY defines */ +#define GS40G_PAGE_SELECT 0x16 +#define GS40G_PAGE_SHIFT 16 +#define GS40G_OFFSET_MASK 0xFFFF +#define GS40G_PAGE_2 0x20000 +#define GS40G_MAC_REG2 0x15 +#define GS40G_MAC_LB 0x4140 +#define GS40G_MAC_SPEED_1G 0X0006 +#define GS40G_COPPER_SPEC 0x0010 +#define GS40G_CS_POWER_DOWN 0x0002 + +#define HV_INTC_FC_PAGE_START 768 +#define I82578_ADDR_REG 29 +#define I82577_ADDR_REG 16 +#define I82577_CFG_REG 22 +#define I82577_CFG_ASSERT_CRS_ON_TX (1 << 15) +#define I82577_CFG_ENABLE_DOWNSHIFT (3 << 10) /* auto downshift */ +#define I82577_CTRL_REG 23 + +/* 82577 specific PHY registers */ +#define I82577_PHY_CTRL_2 18 +#define I82577_PHY_LBK_CTRL 19 +#define I82577_PHY_STATUS_2 26 +#define I82577_PHY_DIAG_STATUS 31 + +/* I82577 PHY Status 2 */ +#define I82577_PHY_STATUS2_REV_POLARITY 0x0400 +#define I82577_PHY_STATUS2_MDIX 0x0800 +#define I82577_PHY_STATUS2_SPEED_MASK 0x0300 +#define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 + +/* I82577 PHY Control 2 */ +#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600 + +/* I82577 PHY Diagnostics Status */ +#define I82577_DSTATUS_CABLE_LENGTH 0x03FC +#define I82577_DSTATUS_CABLE_LENGTH_SHIFT 2 + +/* 82580 PHY Power Management */ +#define E1000_82580_PHY_POWER_MGMT 0xE14 +#define E1000_82580_PM_SPD 0x0001 /* Smart Power Down */ +#define E1000_82580_PM_D0_LPLU 0x0002 /* For D0a states */ +#define E1000_82580_PM_D3_LPLU 0x0004 /* For all other states */ +#define E1000_82580_PM_GO_LINKD 0x0020 /* Go Link Disconnect */ + +#define E1000_MPHY_DIS_ACCESS 0x80000000 /* disable_access bit */ +#define E1000_MPHY_ENA_ACCESS 0x40000000 /* enable_access bit */ +#define E1000_MPHY_BUSY 0x00010000 /* busy bit */ +#define E1000_MPHY_ADDRESS_FNC_OVERRIDE 0x20000000 /* fnc_override bit */ +#define E1000_MPHY_ADDRESS_MASK 0x0000FFFF /* address mask */ + +#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4 +#define IGP01E1000_PHY_POLARITY_MASK 0x0078 + +#define IGP01E1000_PSCR_AUTO_MDIX 0x1000 +#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0=MDI, 1=MDIX */ + +#define IGP01E1000_PSCFR_SMART_SPEED 0x0080 + +#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */ +#define IGP02E1000_PM_D0_LPLU 0x0002 /* For D0a states */ +#define IGP02E1000_PM_D3_LPLU 0x0004 /* For all other states */ + +#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 + +#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 +#define IGP01E1000_PSSR_MDIX 0x0800 +#define IGP01E1000_PSSR_SPEED_MASK 0xC000 +#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 + +#define IGP02E1000_PHY_CHANNEL_NUM 4 +#define IGP02E1000_PHY_AGC_A 0x11B1 +#define IGP02E1000_PHY_AGC_B 0x12B1 +#define IGP02E1000_PHY_AGC_C 0x14B1 +#define IGP02E1000_PHY_AGC_D 0x18B1 + +#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Course=15:13, Fine=12:9 */ +#define IGP02E1000_AGC_LENGTH_MASK 0x7F +#define IGP02E1000_AGC_RANGE 15 + +#define E1000_CABLE_LENGTH_UNDEFINED 0xFF + +#define E1000_KMRNCTRLSTA_OFFSET 0x001F0000 +#define E1000_KMRNCTRLSTA_OFFSET_SHIFT 16 +#define E1000_KMRNCTRLSTA_REN 0x00200000 +#define E1000_KMRNCTRLSTA_DIAG_OFFSET 0x3 /* Kumeran Diagnostic */ +#define E1000_KMRNCTRLSTA_TIMEOUTS 0x4 /* Kumeran Timeouts */ +#define E1000_KMRNCTRLSTA_INBAND_PARAM 0x9 /* Kumeran InBand Parameters */ +#define E1000_KMRNCTRLSTA_IBIST_DISABLE 0x0200 /* Kumeran IBIST Disable */ +#define E1000_KMRNCTRLSTA_DIAG_NELPBK 0x1000 /* Nearend Loopback mode */ + +#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10 +#define IFE_PHY_SPECIAL_CONTROL 0x11 /* 100BaseTx PHY Special Ctrl */ +#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY Special and LED Ctrl */ +#define IFE_PHY_MDIX_CONTROL 0x1C /* MDI/MDI-X Control */ + +/* IFE PHY Extended Status Control */ +#define IFE_PESC_POLARITY_REVERSED 0x0100 + +/* IFE PHY Special Control */ +#define IFE_PSC_AUTO_POLARITY_DISABLE 0x0010 +#define IFE_PSC_FORCE_POLARITY 0x0020 + +/* IFE PHY Special Control and LED Control */ +#define IFE_PSCL_PROBE_MODE 0x0020 +#define IFE_PSCL_PROBE_LEDS_OFF 0x0006 /* Force LEDs 0 and 2 off */ +#define IFE_PSCL_PROBE_LEDS_ON 0x0007 /* Force LEDs 0 and 2 on */ + +/* IFE PHY MDIX Control */ +#define IFE_PMC_MDIX_STATUS 0x0020 /* 1=MDI-X, 0=MDI */ +#define IFE_PMC_FORCE_MDIX 0x0040 /* 1=force MDI-X, 0=force MDI */ +#define IFE_PMC_AUTO_MDIX 0x0080 /* 1=enable auto, 0=disable */ + +/* SFP modules ID memory locations */ +#define E1000_SFF_IDENTIFIER_OFFSET 0x00 +#define E1000_SFF_IDENTIFIER_SFF 0x02 +#define E1000_SFF_IDENTIFIER_SFP 0x03 + +#define E1000_SFF_ETH_FLAGS_OFFSET 0x06 +/* Flags for SFP modules compatible with ETH up to 1Gb */ +struct sfp_e1000_flags { + u8 e1000_base_sx:1; + u8 e1000_base_lx:1; + u8 e1000_base_cx:1; + u8 e1000_base_t:1; + u8 e100_base_lx:1; + u8 e100_base_fx:1; + u8 e10_base_bx10:1; + u8 e10_base_px:1; +}; + +/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */ +#define E1000_SFF_VENDOR_OUI_TYCO 0x00407600 +#define E1000_SFF_VENDOR_OUI_FTL 0x00906500 +#define E1000_SFF_VENDOR_OUI_AVAGO 0x00176A00 +#define E1000_SFF_VENDOR_OUI_INTEL 0x001B2100 + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h new file mode 100644 index 00000000..0e083c54 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h @@ -0,0 +1,646 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_REGS_H_ +#define _E1000_REGS_H_ + +#define E1000_CTRL 0x00000 /* Device Control - RW */ +#define E1000_STATUS 0x00008 /* Device Status - RO */ +#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ +#define E1000_EERD 0x00014 /* EEPROM Read - RW */ +#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ +#define E1000_FLA 0x0001C /* Flash Access - RW */ +#define E1000_MDIC 0x00020 /* MDI Control - RW */ +#define E1000_MDICNFG 0x00E04 /* MDI Config - RW */ +#define E1000_REGISTER_SET_SIZE 0x20000 /* CSR Size */ +#define E1000_EEPROM_INIT_CTRL_WORD_2 0x0F /* EEPROM Init Ctrl Word 2 */ +#define E1000_EEPROM_PCIE_CTRL_WORD_2 0x28 /* EEPROM PCIe Ctrl Word 2 */ +#define E1000_BARCTRL 0x5BBC /* BAR ctrl reg */ +#define E1000_BARCTRL_FLSIZE 0x0700 /* BAR ctrl Flsize */ +#define E1000_BARCTRL_CSRSIZE 0x2000 /* BAR ctrl CSR size */ +#define E1000_MPHY_ADDR_CTRL 0x0024 /* GbE MPHY Address Control */ +#define E1000_MPHY_DATA 0x0E10 /* GBE MPHY Data */ +#define E1000_MPHY_STAT 0x0E0C /* GBE MPHY Statistics */ +#define E1000_PPHY_CTRL 0x5b48 /* PCIe PHY Control */ +#define E1000_I350_BARCTRL 0x5BFC /* BAR ctrl reg */ +#define E1000_I350_DTXMXPKTSZ 0x355C /* Maximum sent packet size reg*/ +#define E1000_SCTL 0x00024 /* SerDes Control - RW */ +#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ +#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ +#define E1000_FCT 0x00030 /* Flow Control Type - RW */ +#define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ +#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ +#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ +#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ +#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ +#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ +#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ +#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ +#define E1000_RCTL 0x00100 /* Rx Control - RW */ +#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ +#define E1000_TXCW 0x00178 /* Tx Configuration Word - RW */ +#define E1000_RXCW 0x00180 /* Rx Configuration Word - RO */ +#define E1000_EICR 0x01580 /* Ext. Interrupt Cause Read - R/clr */ +#define E1000_EITR(_n) (0x01680 + (0x4 * (_n))) +#define E1000_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ +#define E1000_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ +#define E1000_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ +#define E1000_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ +#define E1000_EIAM 0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */ +#define E1000_GPIE 0x01514 /* General Purpose Interrupt Enable - RW */ +#define E1000_IVAR0 0x01700 /* Interrupt Vector Allocation (array) - RW */ +#define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ +#define E1000_TCTL 0x00400 /* Tx Control - RW */ +#define E1000_TCTL_EXT 0x00404 /* Extended Tx Control - RW */ +#define E1000_TIPG 0x00410 /* Tx Inter-packet gap -RW */ +#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ +#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ +#define E1000_LEDMUX 0x08130 /* LED MUX Control */ +#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ +#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ +#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ +#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ +#define E1000_PBS 0x01008 /* Packet Buffer Size */ +#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ +#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ +#define E1000_FLOP 0x0103C /* FLASH Opcode Register */ +#define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ +#define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */ +#define E1000_I2CBB_EN 0x00000100 /* I2C - Bit Bang Enable */ +#define E1000_I2C_CLK_OUT 0x00000200 /* I2C- Clock */ +#define E1000_I2C_DATA_OUT 0x00000400 /* I2C- Data Out */ +#define E1000_I2C_DATA_OE_N 0x00000800 /* I2C- Data Output Enable */ +#define E1000_I2C_DATA_IN 0x00001000 /* I2C- Data In */ +#define E1000_I2C_CLK_OE_N 0x00002000 /* I2C- Clock Output Enable */ +#define E1000_I2C_CLK_IN 0x00004000 /* I2C- Clock In */ +#define E1000_I2C_CLK_STRETCH_DIS 0x00008000 /* I2C- Dis Clk Stretching */ +#define E1000_WDSTP 0x01040 /* Watchdog Setup - RW */ +#define E1000_SWDSTS 0x01044 /* SW Device Status - RW */ +#define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ +#define E1000_TCPTIMER 0x0104C /* TCP Timer - RW */ +#define E1000_VPDDIAG 0x01060 /* VPD Diagnostic - RO */ +#define E1000_ICR_V2 0x01500 /* Intr Cause - new location - RC */ +#define E1000_ICS_V2 0x01504 /* Intr Cause Set - new location - WO */ +#define E1000_IMS_V2 0x01508 /* Intr Mask Set/Read - new location - RW */ +#define E1000_IMC_V2 0x0150C /* Intr Mask Clear - new location - WO */ +#define E1000_IAM_V2 0x01510 /* Intr Ack Auto Mask - new location - RW */ +#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ +#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ +#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ +#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ +#define E1000_RDFH 0x02410 /* Rx Data FIFO Head - RW */ +#define E1000_RDFT 0x02418 /* Rx Data FIFO Tail - RW */ +#define E1000_RDFHS 0x02420 /* Rx Data FIFO Head Saved - RW */ +#define E1000_RDFTS 0x02428 /* Rx Data FIFO Tail Saved - RW */ +#define E1000_RDFPC 0x02430 /* Rx Data FIFO Packet Count - RW */ +#define E1000_PBRTH 0x02458 /* PB Rx Arbitration Threshold - RW */ +#define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */ +/* Split and Replication Rx Control - RW */ +#define E1000_RDPUMB 0x025CC /* DMA Rx Descriptor uC Mailbox - RW */ +#define E1000_RDPUAD 0x025D0 /* DMA Rx Descriptor uC Addr Command - RW */ +#define E1000_RDPUWD 0x025D4 /* DMA Rx Descriptor uC Data Write - RW */ +#define E1000_RDPURD 0x025D8 /* DMA Rx Descriptor uC Data Read - RW */ +#define E1000_RDPUCTL 0x025DC /* DMA Rx Descriptor uC Control - RW */ +#define E1000_PBDIAG 0x02458 /* Packet Buffer Diagnostic - RW */ +#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ +#define E1000_IRPBS 0x02404 /* Same as RXPBS, renamed for newer Si - RW */ +#define E1000_PBRWAC 0x024E8 /* Rx packet buffer wrap around counter - RO */ +#define E1000_RDTR 0x02820 /* Rx Delay Timer - RW */ +#define E1000_RADV 0x0282C /* Rx Interrupt Absolute Delay Timer - RW */ +#define E1000_EMIADD 0x10 /* Extended Memory Indirect Address */ +#define E1000_EMIDATA 0x11 /* Extended Memory Indirect Data */ +#define E1000_SRWR 0x12018 /* Shadow Ram Write Register - RW */ +#define E1000_I210_FLMNGCTL 0x12038 +#define E1000_I210_FLMNGDATA 0x1203C +#define E1000_I210_FLMNGCNT 0x12040 + +#define E1000_I210_FLSWCTL 0x12048 +#define E1000_I210_FLSWDATA 0x1204C +#define E1000_I210_FLSWCNT 0x12050 + +#define E1000_I210_FLA 0x1201C + +#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) +#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ + +/* QAV Tx mode control register */ +#define E1000_I210_TQAVCTRL 0x3570 + +/* QAV Tx mode control register bitfields masks */ +/* QAV enable */ +#define E1000_TQAVCTRL_MODE (1 << 0) +/* Fetching arbitration type */ +#define E1000_TQAVCTRL_FETCH_ARB (1 << 4) +/* Fetching timer enable */ +#define E1000_TQAVCTRL_FETCH_TIMER_ENABLE (1 << 5) +/* Launch arbitration type */ +#define E1000_TQAVCTRL_LAUNCH_ARB (1 << 8) +/* Launch timer enable */ +#define E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE (1 << 9) +/* SP waits for SR enable */ +#define E1000_TQAVCTRL_SP_WAIT_SR (1 << 10) +/* Fetching timer correction */ +#define E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET 16 +#define E1000_TQAVCTRL_FETCH_TIMER_DELTA \ + (0xFFFF << E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET) + +/* High credit registers where _n can be 0 or 1. */ +#define E1000_I210_TQAVHC(_n) (0x300C + 0x40 * (_n)) + +/* Queues fetch arbitration priority control register */ +#define E1000_I210_TQAVARBCTRL 0x3574 +/* Queues priority masks where _n and _p can be 0-3. */ +#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p) ((_p) << (2 * _n)) +/* QAV Tx mode control registers where _n can be 0 or 1. */ +#define E1000_I210_TQAVCC(_n) (0x3004 + 0x40 * (_n)) + +/* QAV Tx mode control register bitfields masks */ +#define E1000_TQAVCC_IDLE_SLOPE 0xFFFF /* Idle slope */ +#define E1000_TQAVCC_KEEP_CREDITS (1 << 30) /* Keep credits opt enable */ +#define E1000_TQAVCC_QUEUE_MODE (1 << 31) /* SP vs. SR Tx mode */ + +/* Good transmitted packets counter registers */ +#define E1000_PQGPTC(_n) (0x010014 + (0x100 * (_n))) + +/* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */ +#define E1000_I210_TXPBS_SIZE(_n, _s) ((_s) << (6 * _n)) + +#define E1000_MMDAC 13 /* MMD Access Control */ +#define E1000_MMDAAD 14 /* MMD Access Address/Data */ + +/* Convenience macros + * + * Note: "_n" is the queue number of the register to be written to. + * + * Example usage: + * E1000_RDBAL_REG(current_rx_queue) + */ +#define E1000_RDBAL(_n) ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \ + (0x0C000 + ((_n) * 0x40))) +#define E1000_RDBAH(_n) ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \ + (0x0C004 + ((_n) * 0x40))) +#define E1000_RDLEN(_n) ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \ + (0x0C008 + ((_n) * 0x40))) +#define E1000_SRRCTL(_n) ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \ + (0x0C00C + ((_n) * 0x40))) +#define E1000_RDH(_n) ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \ + (0x0C010 + ((_n) * 0x40))) +#define E1000_RXCTL(_n) ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \ + (0x0C014 + ((_n) * 0x40))) +#define E1000_DCA_RXCTRL(_n) E1000_RXCTL(_n) +#define E1000_RDT(_n) ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \ + (0x0C018 + ((_n) * 0x40))) +#define E1000_RXDCTL(_n) ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \ + (0x0C028 + ((_n) * 0x40))) +#define E1000_RQDPC(_n) ((_n) < 4 ? (0x02830 + ((_n) * 0x100)) : \ + (0x0C030 + ((_n) * 0x40))) +#define E1000_TDBAL(_n) ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \ + (0x0E000 + ((_n) * 0x40))) +#define E1000_TDBAH(_n) ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \ + (0x0E004 + ((_n) * 0x40))) +#define E1000_TDLEN(_n) ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \ + (0x0E008 + ((_n) * 0x40))) +#define E1000_TDH(_n) ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \ + (0x0E010 + ((_n) * 0x40))) +#define E1000_TXCTL(_n) ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \ + (0x0E014 + ((_n) * 0x40))) +#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n) +#define E1000_TDT(_n) ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \ + (0x0E018 + ((_n) * 0x40))) +#define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \ + (0x0E028 + ((_n) * 0x40))) +#define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : \ + (0x0E038 + ((_n) * 0x40))) +#define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : \ + (0x0E03C + ((_n) * 0x40))) +#define E1000_TARC(_n) (0x03840 + ((_n) * 0x100)) +#define E1000_RSRPD 0x02C00 /* Rx Small Packet Detect - RW */ +#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ +#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ +#define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) +#define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ + (0x054E0 + ((_i - 16) * 8))) +#define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ + (0x054E4 + ((_i - 16) * 8))) +#define E1000_SHRAL(_i) (0x05438 + ((_i) * 8)) +#define E1000_SHRAH(_i) (0x0543C + ((_i) * 8)) +#define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) +#define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) +#define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) +#define E1000_FFMT_REG(_i) (0x09000 + ((_i) * 8)) +#define E1000_FFVT_REG(_i) (0x09800 + ((_i) * 8)) +#define E1000_FFLT_REG(_i) (0x05F00 + ((_i) * 8)) +#define E1000_PBSLAC 0x03100 /* Pkt Buffer Slave Access Control */ +#define E1000_PBSLAD(_n) (0x03110 + (0x4 * (_n))) /* Pkt Buffer DWORD */ +#define E1000_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */ +/* Same as TXPBS, renamed for newer Si - RW */ +#define E1000_ITPBS 0x03404 +#define E1000_TDFH 0x03410 /* Tx Data FIFO Head - RW */ +#define E1000_TDFT 0x03418 /* Tx Data FIFO Tail - RW */ +#define E1000_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */ +#define E1000_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */ +#define E1000_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */ +#define E1000_TDPUMB 0x0357C /* DMA Tx Desc uC Mail Box - RW */ +#define E1000_TDPUAD 0x03580 /* DMA Tx Desc uC Addr Command - RW */ +#define E1000_TDPUWD 0x03584 /* DMA Tx Desc uC Data Write - RW */ +#define E1000_TDPURD 0x03588 /* DMA Tx Desc uC Data Read - RW */ +#define E1000_TDPUCTL 0x0358C /* DMA Tx Desc uC Control - RW */ +#define E1000_DTXCTL 0x03590 /* DMA Tx Control - RW */ +#define E1000_DTXTCPFLGL 0x0359C /* DMA Tx Control flag low - RW */ +#define E1000_DTXTCPFLGH 0x035A0 /* DMA Tx Control flag high - RW */ +/* DMA Tx Max Total Allow Size Reqs - RW */ +#define E1000_DTXMXSZRQ 0x03540 +#define E1000_TIDV 0x03820 /* Tx Interrupt Delay Value - RW */ +#define E1000_TADV 0x0382C /* Tx Interrupt Absolute Delay Val - RW */ +#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ +#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ +#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ +#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ +#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ +#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ +#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ +#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ +#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ +#define E1000_COLC 0x04028 /* Collision Count - R/clr */ +#define E1000_DC 0x04030 /* Defer Count - R/clr */ +#define E1000_TNCRS 0x04034 /* Tx-No CRS - R/clr */ +#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ +#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ +#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ +#define E1000_XONRXC 0x04048 /* XON Rx Count - R/clr */ +#define E1000_XONTXC 0x0404C /* XON Tx Count - R/clr */ +#define E1000_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */ +#define E1000_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */ +#define E1000_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */ +#define E1000_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */ +#define E1000_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */ +#define E1000_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */ +#define E1000_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */ +#define E1000_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */ +#define E1000_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */ +#define E1000_GPRC 0x04074 /* Good Packets Rx Count - R/clr */ +#define E1000_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */ +#define E1000_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */ +#define E1000_GPTC 0x04080 /* Good Packets Tx Count - R/clr */ +#define E1000_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */ +#define E1000_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */ +#define E1000_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */ +#define E1000_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */ +#define E1000_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */ +#define E1000_RUC 0x040A4 /* Rx Undersize Count - R/clr */ +#define E1000_RFC 0x040A8 /* Rx Fragment Count - R/clr */ +#define E1000_ROC 0x040AC /* Rx Oversize Count - R/clr */ +#define E1000_RJC 0x040B0 /* Rx Jabber Count - R/clr */ +#define E1000_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */ +#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ +#define E1000_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */ +#define E1000_TORL 0x040C0 /* Total Octets Rx Low - R/clr */ +#define E1000_TORH 0x040C4 /* Total Octets Rx High - R/clr */ +#define E1000_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */ +#define E1000_TOTH 0x040CC /* Total Octets Tx High - R/clr */ +#define E1000_TPR 0x040D0 /* Total Packets Rx - R/clr */ +#define E1000_TPT 0x040D4 /* Total Packets Tx - R/clr */ +#define E1000_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */ +#define E1000_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */ +#define E1000_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */ +#define E1000_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */ +#define E1000_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */ +#define E1000_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */ +#define E1000_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */ +#define E1000_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */ +#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */ +#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */ +#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ +#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Pkt Timer Expire Count */ +#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Abs Timer Expire Count */ +#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Pkt Timer Expire Count */ +#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Abs Timer Expire Count */ +#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ +#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Min Thresh Count */ +#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Desc Min Thresh Count */ +#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ + +/* Virtualization statistical counters */ +#define E1000_PFVFGPRC(_n) (0x010010 + (0x100 * (_n))) +#define E1000_PFVFGPTC(_n) (0x010014 + (0x100 * (_n))) +#define E1000_PFVFGORC(_n) (0x010018 + (0x100 * (_n))) +#define E1000_PFVFGOTC(_n) (0x010034 + (0x100 * (_n))) +#define E1000_PFVFMPRC(_n) (0x010038 + (0x100 * (_n))) +#define E1000_PFVFGPRLBC(_n) (0x010040 + (0x100 * (_n))) +#define E1000_PFVFGPTLBC(_n) (0x010044 + (0x100 * (_n))) +#define E1000_PFVFGORLBC(_n) (0x010048 + (0x100 * (_n))) +#define E1000_PFVFGOTLBC(_n) (0x010050 + (0x100 * (_n))) + +/* LinkSec */ +#define E1000_LSECTXUT 0x04300 /* Tx Untagged Pkt Cnt */ +#define E1000_LSECTXPKTE 0x04304 /* Encrypted Tx Pkts Cnt */ +#define E1000_LSECTXPKTP 0x04308 /* Protected Tx Pkt Cnt */ +#define E1000_LSECTXOCTE 0x0430C /* Encrypted Tx Octets Cnt */ +#define E1000_LSECTXOCTP 0x04310 /* Protected Tx Octets Cnt */ +#define E1000_LSECRXUT 0x04314 /* Untagged non-Strict Rx Pkt Cnt */ +#define E1000_LSECRXOCTD 0x0431C /* Rx Octets Decrypted Count */ +#define E1000_LSECRXOCTV 0x04320 /* Rx Octets Validated */ +#define E1000_LSECRXBAD 0x04324 /* Rx Bad Tag */ +#define E1000_LSECRXNOSCI 0x04328 /* Rx Packet No SCI Count */ +#define E1000_LSECRXUNSCI 0x0432C /* Rx Packet Unknown SCI Count */ +#define E1000_LSECRXUNCH 0x04330 /* Rx Unchecked Packets Count */ +#define E1000_LSECRXDELAY 0x04340 /* Rx Delayed Packet Count */ +#define E1000_LSECRXLATE 0x04350 /* Rx Late Packets Count */ +#define E1000_LSECRXOK(_n) (0x04360 + (0x04 * (_n))) /* Rx Pkt OK Cnt */ +#define E1000_LSECRXINV(_n) (0x04380 + (0x04 * (_n))) /* Rx Invalid Cnt */ +#define E1000_LSECRXNV(_n) (0x043A0 + (0x04 * (_n))) /* Rx Not Valid Cnt */ +#define E1000_LSECRXUNSA 0x043C0 /* Rx Unused SA Count */ +#define E1000_LSECRXNUSA 0x043D0 /* Rx Not Using SA Count */ +#define E1000_LSECTXCAP 0x0B000 /* Tx Capabilities Register - RO */ +#define E1000_LSECRXCAP 0x0B300 /* Rx Capabilities Register - RO */ +#define E1000_LSECTXCTRL 0x0B004 /* Tx Control - RW */ +#define E1000_LSECRXCTRL 0x0B304 /* Rx Control - RW */ +#define E1000_LSECTXSCL 0x0B008 /* Tx SCI Low - RW */ +#define E1000_LSECTXSCH 0x0B00C /* Tx SCI High - RW */ +#define E1000_LSECTXSA 0x0B010 /* Tx SA0 - RW */ +#define E1000_LSECTXPN0 0x0B018 /* Tx SA PN 0 - RW */ +#define E1000_LSECTXPN1 0x0B01C /* Tx SA PN 1 - RW */ +#define E1000_LSECRXSCL 0x0B3D0 /* Rx SCI Low - RW */ +#define E1000_LSECRXSCH 0x0B3E0 /* Rx SCI High - RW */ +/* LinkSec Tx 128-bit Key 0 - WO */ +#define E1000_LSECTXKEY0(_n) (0x0B020 + (0x04 * (_n))) +/* LinkSec Tx 128-bit Key 1 - WO */ +#define E1000_LSECTXKEY1(_n) (0x0B030 + (0x04 * (_n))) +#define E1000_LSECRXSA(_n) (0x0B310 + (0x04 * (_n))) /* Rx SAs - RW */ +#define E1000_LSECRXPN(_n) (0x0B330 + (0x04 * (_n))) /* Rx SAs - RW */ +/* LinkSec Rx Keys - where _n is the SA no. and _m the 4 dwords of the 128 bit + * key - RW. + */ +#define E1000_LSECRXKEY(_n, _m) (0x0B350 + (0x10 * (_n)) + (0x04 * (_m))) + +#define E1000_SSVPC 0x041A0 /* Switch Security Violation Pkt Cnt */ +#define E1000_IPSCTRL 0xB430 /* IpSec Control Register */ +#define E1000_IPSRXCMD 0x0B408 /* IPSec Rx Command Register - RW */ +#define E1000_IPSRXIDX 0x0B400 /* IPSec Rx Index - RW */ +/* IPSec Rx IPv4/v6 Address - RW */ +#define E1000_IPSRXIPADDR(_n) (0x0B420 + (0x04 * (_n))) +/* IPSec Rx 128-bit Key - RW */ +#define E1000_IPSRXKEY(_n) (0x0B410 + (0x04 * (_n))) +#define E1000_IPSRXSALT 0x0B404 /* IPSec Rx Salt - RW */ +#define E1000_IPSRXSPI 0x0B40C /* IPSec Rx SPI - RW */ +/* IPSec Tx 128-bit Key - RW */ +#define E1000_IPSTXKEY(_n) (0x0B460 + (0x04 * (_n))) +#define E1000_IPSTXSALT 0x0B454 /* IPSec Tx Salt - RW */ +#define E1000_IPSTXIDX 0x0B450 /* IPSec Tx SA IDX - RW */ +#define E1000_PCS_CFG0 0x04200 /* PCS Configuration 0 - RW */ +#define E1000_PCS_LCTL 0x04208 /* PCS Link Control - RW */ +#define E1000_PCS_LSTAT 0x0420C /* PCS Link Status - RO */ +#define E1000_CBTMPC 0x0402C /* Circuit Breaker Tx Packet Count */ +#define E1000_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ +#define E1000_CBRDPC 0x04044 /* Circuit Breaker Rx Dropped Count */ +#define E1000_CBRMPC 0x040FC /* Circuit Breaker Rx Packet Count */ +#define E1000_RPTHC 0x04104 /* Rx Packets To Host */ +#define E1000_HGPTC 0x04118 /* Host Good Packets Tx Count */ +#define E1000_HTCBDPC 0x04124 /* Host Tx Circuit Breaker Dropped Count */ +#define E1000_HGORCL 0x04128 /* Host Good Octets Received Count Low */ +#define E1000_HGORCH 0x0412C /* Host Good Octets Received Count High */ +#define E1000_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ +#define E1000_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ +#define E1000_LENERRS 0x04138 /* Length Errors Count */ +#define E1000_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ +#define E1000_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ +#define E1000_PCS_ANADV 0x04218 /* AN advertisement - RW */ +#define E1000_PCS_LPAB 0x0421C /* Link Partner Ability - RW */ +#define E1000_PCS_NPTX 0x04220 /* AN Next Page Transmit - RW */ +#define E1000_PCS_LPABNP 0x04224 /* Link Partner Ability Next Pg - RW */ +#define E1000_RXCSUM 0x05000 /* Rx Checksum Control - RW */ +#define E1000_RLPML 0x05004 /* Rx Long Packet Max Length */ +#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ +#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ +#define E1000_RA 0x05400 /* Receive Address - RW Array */ +#define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */ +#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ +#define E1000_VT_CTL 0x0581C /* VMDq Control - RW */ +#define E1000_CIAA 0x05B88 /* Config Indirect Access Address - RW */ +#define E1000_CIAD 0x05B8C /* Config Indirect Access Data - RW */ +#define E1000_VFQA0 0x0B000 /* VLAN Filter Queue Array 0 - RW Array */ +#define E1000_VFQA1 0x0B200 /* VLAN Filter Queue Array 1 - RW Array */ +#define E1000_WUC 0x05800 /* Wakeup Control - RW */ +#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ +#define E1000_WUS 0x05810 /* Wakeup Status - RO */ +#define E1000_MANC 0x05820 /* Management Control - RW */ +#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ +#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ +#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ +#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ +#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ +#define E1000_PBACL 0x05B68 /* MSIx PBA Clear - Read/Write 1's to clear */ +#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ +#define E1000_HOST_IF 0x08800 /* Host Interface */ +#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ +#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ +#define E1000_HIBBA 0x8F40 /* Host Interface Buffer Base Address */ +/* Flexible Host Filter Table */ +#define E1000_FHFT(_n) (0x09000 + ((_n) * 0x100)) +/* Ext Flexible Host Filter Table */ +#define E1000_FHFT_EXT(_n) (0x09A00 + ((_n) * 0x100)) + + +#define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */ +#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ +/* Management Decision Filters */ +#define E1000_MDEF(_n) (0x05890 + (4 * (_n))) +#define E1000_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */ +#define E1000_CCMCTL 0x05B48 /* CCM Control Register */ +#define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ +#define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */ +#define E1000_GCR 0x05B00 /* PCI-Ex Control */ +#define E1000_GCR2 0x05B64 /* PCI-Ex Control #2 */ +#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ +#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ +#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ +#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ +#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ +#define E1000_SWSM 0x05B50 /* SW Semaphore */ +#define E1000_FWSM 0x05B54 /* FW Semaphore */ +/* Driver-only SW semaphore (not used by BOOT agents) */ +#define E1000_SWSM2 0x05B58 +#define E1000_DCA_ID 0x05B70 /* DCA Requester ID Information - RO */ +#define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */ +#define E1000_UFUSE 0x05B78 /* UFUSE - RO */ +#define E1000_FFLT_DBG 0x05F04 /* Debug Register */ +#define E1000_HICR 0x08F00 /* Host Interface Control */ +#define E1000_FWSTS 0x08F0C /* FW Status */ + +/* RSS registers */ +#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ +#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ +#define E1000_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ +#define E1000_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/ +#define E1000_IMIRVP 0x05AC0 /* Immediate INT Rx VLAN Priority -RW */ +#define E1000_MSIXBM(_i) (0x01600 + ((_i) * 4)) /* MSI-X Alloc Reg -RW */ +#define E1000_RETA(_i) (0x05C00 + ((_i) * 4)) /* Redirection Table - RW */ +#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW */ +#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ +#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ +/* VT Registers */ +#define E1000_SWPBS 0x03004 /* Switch Packet Buffer Size - RW */ +#define E1000_MBVFICR 0x00C80 /* Mailbox VF Cause - RWC */ +#define E1000_MBVFIMR 0x00C84 /* Mailbox VF int Mask - RW */ +#define E1000_VFLRE 0x00C88 /* VF Register Events - RWC */ +#define E1000_VFRE 0x00C8C /* VF Receive Enables */ +#define E1000_VFTE 0x00C90 /* VF Transmit Enables */ +#define E1000_QDE 0x02408 /* Queue Drop Enable - RW */ +#define E1000_DTXSWC 0x03500 /* DMA Tx Switch Control - RW */ +#define E1000_WVBR 0x03554 /* VM Wrong Behavior - RWS */ +#define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */ +#define E1000_UTA 0x0A000 /* Unicast Table Array - RW */ +#define E1000_IOVTCL 0x05BBC /* IOV Control Register */ +#define E1000_VMRCTL 0X05D80 /* Virtual Mirror Rule Control */ +#define E1000_VMRVLAN 0x05D90 /* Virtual Mirror Rule VLAN */ +#define E1000_VMRVM 0x05DA0 /* Virtual Mirror Rule VM */ +#define E1000_MDFB 0x03558 /* Malicious Driver free block */ +#define E1000_LVMMC 0x03548 /* Last VM Misbehavior cause */ +#define E1000_TXSWC 0x05ACC /* Tx Switch Control */ +#define E1000_SCCRL 0x05DB0 /* Storm Control Control */ +#define E1000_BSCTRH 0x05DB8 /* Broadcast Storm Control Threshold */ +#define E1000_MSCTRH 0x05DBC /* Multicast Storm Control Threshold */ +/* These act per VF so an array friendly macro is used */ +#define E1000_V2PMAILBOX(_n) (0x00C40 + (4 * (_n))) +#define E1000_P2VMAILBOX(_n) (0x00C00 + (4 * (_n))) +#define E1000_VMBMEM(_n) (0x00800 + (64 * (_n))) +#define E1000_VFVMBMEM(_n) (0x00800 + (_n)) +#define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n))) +/* VLAN Virtual Machine Filter - RW */ +#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) +#define E1000_VMVIR(_n) (0x03700 + (4 * (_n))) +#define E1000_DVMOLR(_n) (0x0C038 + (0x40 * (_n))) /* DMA VM offload */ +#define E1000_VTCTRL(_n) (0x10000 + (0x100 * (_n))) /* VT Control */ +#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ +#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ +#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ +#define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ +#define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ +#define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */ +#define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */ +#define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ +#define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ +#define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ +#define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ +#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */ +#define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */ +#define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ +#define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ +#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ +#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ + +/* Filtering Registers */ +#define E1000_SAQF(_n) (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */ +#define E1000_DAQF(_n) (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */ +#define E1000_SPQF(_n) (0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */ +#define E1000_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ +#define E1000_TTQF(_n) (0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */ +#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */ +#define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ + +#define E1000_RTTDCS 0x3600 /* Reedtown Tx Desc plane control and status */ +#define E1000_RTTPCS 0x3474 /* Reedtown Tx Packet Plane control and status */ +#define E1000_RTRPCS 0x2474 /* Rx packet plane control and status */ +#define E1000_RTRUP2TC 0x05AC4 /* Rx User Priority to Traffic Class */ +#define E1000_RTTUP2TC 0x0418 /* Transmit User Priority to Traffic Class */ +/* Tx Desc plane TC Rate-scheduler config */ +#define E1000_RTTDTCRC(_n) (0x3610 + ((_n) * 4)) +/* Tx Packet plane TC Rate-Scheduler Config */ +#define E1000_RTTPTCRC(_n) (0x3480 + ((_n) * 4)) +/* Rx Packet plane TC Rate-Scheduler Config */ +#define E1000_RTRPTCRC(_n) (0x2480 + ((_n) * 4)) +/* Tx Desc Plane TC Rate-Scheduler Status */ +#define E1000_RTTDTCRS(_n) (0x3630 + ((_n) * 4)) +/* Tx Desc Plane TC Rate-Scheduler MMW */ +#define E1000_RTTDTCRM(_n) (0x3650 + ((_n) * 4)) +/* Tx Packet plane TC Rate-Scheduler Status */ +#define E1000_RTTPTCRS(_n) (0x34A0 + ((_n) * 4)) +/* Tx Packet plane TC Rate-scheduler MMW */ +#define E1000_RTTPTCRM(_n) (0x34C0 + ((_n) * 4)) +/* Rx Packet plane TC Rate-Scheduler Status */ +#define E1000_RTRPTCRS(_n) (0x24A0 + ((_n) * 4)) +/* Rx Packet plane TC Rate-Scheduler MMW */ +#define E1000_RTRPTCRM(_n) (0x24C0 + ((_n) * 4)) +/* Tx Desc plane VM Rate-Scheduler MMW*/ +#define E1000_RTTDVMRM(_n) (0x3670 + ((_n) * 4)) +/* Tx BCN Rate-Scheduler MMW */ +#define E1000_RTTBCNRM(_n) (0x3690 + ((_n) * 4)) +#define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select */ +#define E1000_RTTDVMRC 0x3608 /* Tx Desc Plane VM Rate-Scheduler Config */ +#define E1000_RTTDVMRS 0x360C /* Tx Desc Plane VM Rate-Scheduler Status */ +#define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config */ +#define E1000_RTTBCNRS 0x36B4 /* Tx BCN Rate-Scheduler Status */ +#define E1000_RTTBCNCR 0xB200 /* Tx BCN Control Register */ +#define E1000_RTTBCNTG 0x35A4 /* Tx BCN Tagging */ +#define E1000_RTTBCNCP 0xB208 /* Tx BCN Congestion point */ +#define E1000_RTRBCNCR 0xB20C /* Rx BCN Control Register */ +#define E1000_RTTBCNRD 0x36B8 /* Tx BCN Rate Drift */ +#define E1000_PFCTOP 0x1080 /* Priority Flow Control Type and Opcode */ +#define E1000_RTTBCNIDX 0xB204 /* Tx BCN Congestion Point */ +#define E1000_RTTBCNACH 0x0B214 /* Tx BCN Control High */ +#define E1000_RTTBCNACL 0x0B210 /* Tx BCN Control Low */ + +/* DMA Coalescing registers */ +#define E1000_DMACR 0x02508 /* Control Register */ +#define E1000_DMCTXTH 0x03550 /* Transmit Threshold */ +#define E1000_DMCTLX 0x02514 /* Time to Lx Request */ +#define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ +#define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ +#define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ +#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ + +/* PCIe Parity Status Register */ +#define E1000_PCIEERRSTS 0x05BA8 + +#define E1000_PROXYS 0x5F64 /* Proxying Status */ +#define E1000_PROXYFC 0x5F60 /* Proxying Filter Control */ +/* Thermal sensor configuration and status registers */ +#define E1000_THMJT 0x08100 /* Junction Temperature */ +#define E1000_THLOWTC 0x08104 /* Low Threshold Control */ +#define E1000_THMIDTC 0x08108 /* Mid Threshold Control */ +#define E1000_THHIGHTC 0x0810C /* High Threshold Control */ +#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ + +/* Energy Efficient Ethernet "EEE" registers */ +#define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ +#define E1000_LTRC 0x01A0 /* Latency Tolerance Reporting Control */ +#define E1000_EEER 0x0E30 /* Energy Efficient Ethernet "EEE"*/ +#define E1000_EEE_SU 0x0E34 /* EEE Setup */ +#define E1000_TLPIC 0x4148 /* EEE Tx LPI Count - TLPIC */ +#define E1000_RLPIC 0x414C /* EEE Rx LPI Count - RLPIC */ + +/* OS2BMC Registers */ +#define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */ +#define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */ +#define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */ +#define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */ + + + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h new file mode 100644 index 00000000..e5554ca3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h @@ -0,0 +1,859 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* Linux PRO/1000 Ethernet Driver main header file */ + +#ifndef _IGB_H_ +#define _IGB_H_ + +#include + +#ifndef IGB_NO_LRO +#include +#endif + +#undef HAVE_HW_TIME_STAMP +#ifdef HAVE_HW_TIME_STAMP +#include +#include +#include + +#endif +#ifdef SIOCETHTOOL +#include +#endif + +struct igb_adapter; + +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +//#define IGB_DCA +#endif +#ifdef IGB_DCA +#include +#endif + +#include "kcompat.h" + +#ifdef HAVE_SCTP +#include +#endif + +#include "e1000_api.h" +#include "e1000_82575.h" +#include "e1000_manage.h" +#include "e1000_mbx.h" + +#define IGB_ERR(args...) printk(KERN_ERR "igb: " args) + +#define PFX "igb: " +#define DPRINTK(nlevel, klevel, fmt, args...) \ + (void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ + printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ + __FUNCTION__ , ## args)) + +#ifdef HAVE_PTP_1588_CLOCK +#include +#include +#include +#endif /* HAVE_PTP_1588_CLOCK */ + +#ifdef HAVE_I2C_SUPPORT +#include +#include +#endif /* HAVE_I2C_SUPPORT */ + +/* Interrupt defines */ +#define IGB_START_ITR 648 /* ~6000 ints/sec */ +#define IGB_4K_ITR 980 +#define IGB_20K_ITR 196 +#define IGB_70K_ITR 56 + +/* Interrupt modes, as used by the IntMode parameter */ +#define IGB_INT_MODE_LEGACY 0 +#define IGB_INT_MODE_MSI 1 +#define IGB_INT_MODE_MSIX 2 + +/* TX/RX descriptor defines */ +#define IGB_DEFAULT_TXD 256 +#define IGB_DEFAULT_TX_WORK 128 +#define IGB_MIN_TXD 80 +#define IGB_MAX_TXD 4096 + +#define IGB_DEFAULT_RXD 256 +#define IGB_MIN_RXD 80 +#define IGB_MAX_RXD 4096 + +#define IGB_MIN_ITR_USECS 10 /* 100k irq/sec */ +#define IGB_MAX_ITR_USECS 8191 /* 120 irq/sec */ + +#define NON_Q_VECTORS 1 +#define MAX_Q_VECTORS 10 + +/* Transmit and receive queues */ +#define IGB_MAX_RX_QUEUES 16 +#define IGB_MAX_TX_QUEUES 16 + +#define IGB_MAX_VF_MC_ENTRIES 30 +#define IGB_MAX_VF_FUNCTIONS 8 +#define IGB_82576_VF_DEV_ID 0x10CA +#define IGB_I350_VF_DEV_ID 0x1520 +#define IGB_MAX_UTA_ENTRIES 128 +#define MAX_EMULATION_MAC_ADDRS 16 +#define OUI_LEN 3 +#define IGB_MAX_VMDQ_QUEUES 8 + + +struct vf_data_storage { + unsigned char vf_mac_addresses[ETH_ALEN]; + u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; + u16 num_vf_mc_hashes; + u16 default_vf_vlan_id; + u16 vlans_enabled; + unsigned char em_mac_addresses[MAX_EMULATION_MAC_ADDRS * ETH_ALEN]; + u32 uta_table_copy[IGB_MAX_UTA_ENTRIES]; + u32 flags; + unsigned long last_nack; +#ifdef IFLA_VF_MAX + u16 pf_vlan; /* When set, guest VLAN config not allowed. */ + u16 pf_qos; + u16 tx_rate; +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + bool spoofchk_enabled; +#endif +#endif +}; + +#define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ +#define IGB_VF_FLAG_UNI_PROMISC 0x00000002 /* VF has unicast promisc */ +#define IGB_VF_FLAG_MULTI_PROMISC 0x00000004 /* VF has multicast promisc */ +#define IGB_VF_FLAG_PF_SET_MAC 0x00000008 /* PF has set MAC address */ + +/* RX descriptor control thresholds. + * PTHRESH - MAC will consider prefetch if it has fewer than this number of + * descriptors available in its onboard memory. + * Setting this to 0 disables RX descriptor prefetch. + * HTHRESH - MAC will only prefetch if there are at least this many descriptors + * available in host memory. + * If PTHRESH is 0, this should also be 0. + * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back + * descriptors until either it has this many to write back, or the + * ITR timer expires. + */ +#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : 8) +#define IGB_RX_HTHRESH 8 +#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) +#define IGB_TX_HTHRESH 1 +#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ + adapter->msix_entries) ? 1 : 4) + +/* this is the size past which hardware will drop packets when setting LPE=0 */ +#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 + +/* NOTE: netdev_alloc_skb reserves 16 bytes, NET_IP_ALIGN means we + * reserve 2 more, and skb_shared_info adds an additional 384 more, + * this adds roughly 448 bytes of extra data meaning the smallest + * allocation we could have is 1K. + * i.e. RXBUFFER_512 --> size-1024 slab + */ +/* Supported Rx Buffer Sizes */ +#define IGB_RXBUFFER_256 256 +#define IGB_RXBUFFER_2048 2048 +#define IGB_RXBUFFER_16384 16384 +#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 +#if MAX_SKB_FRAGS < 8 +#define IGB_RX_BUFSZ ALIGN(MAX_JUMBO_FRAME_SIZE / MAX_SKB_FRAGS, 1024) +#else +#define IGB_RX_BUFSZ IGB_RXBUFFER_2048 +#endif + + +/* Packet Buffer allocations */ +#define IGB_PBA_BYTES_SHIFT 0xA +#define IGB_TX_HEAD_ADDR_SHIFT 7 +#define IGB_PBA_TX_MASK 0xFFFF0000 + +#define IGB_FC_PAUSE_TIME 0x0680 /* 858 usec */ + +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */ + +#define IGB_EEPROM_APME 0x0400 +#define AUTO_ALL_MODES 0 + +#ifndef IGB_MASTER_SLAVE +/* Switch to override PHY master/slave setting */ +#define IGB_MASTER_SLAVE e1000_ms_hw_default +#endif + +#define IGB_MNG_VLAN_NONE -1 + +#ifndef IGB_NO_LRO +#define IGB_LRO_MAX 32 /*Maximum number of LRO descriptors*/ +struct igb_lro_stats { + u32 flushed; + u32 coal; +}; + +/* + * igb_lro_header - header format to be aggregated by LRO + * @iph: IP header without options + * @tcp: TCP header + * @ts: Optional TCP timestamp data in TCP options + * + * This structure relies on the check above that verifies that the header + * is IPv4 and does not contain any options. + */ +struct igb_lrohdr { + struct iphdr iph; + struct tcphdr th; + __be32 ts[0]; +}; + +struct igb_lro_list { + struct sk_buff_head active; + struct igb_lro_stats stats; +}; + +#endif /* IGB_NO_LRO */ +struct igb_cb { +#ifndef IGB_NO_LRO +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + union { /* Union defining head/tail partner */ + struct sk_buff *head; + struct sk_buff *tail; + }; +#endif + __be32 tsecr; /* timestamp echo response */ + u32 tsval; /* timestamp value in host order */ + u32 next_seq; /* next expected sequence number */ + u16 free; /* 65521 minus total size */ + u16 mss; /* size of data portion of packet */ + u16 append_cnt; /* number of skb's appended */ +#endif /* IGB_NO_LRO */ +#ifdef HAVE_VLAN_RX_REGISTER + u16 vid; /* VLAN tag */ +#endif +}; +#define IGB_CB(skb) ((struct igb_cb *)(skb)->cb) + +enum igb_tx_flags { + /* cmd_type flags */ + IGB_TX_FLAGS_VLAN = 0x01, + IGB_TX_FLAGS_TSO = 0x02, + IGB_TX_FLAGS_TSTAMP = 0x04, + + /* olinfo flags */ + IGB_TX_FLAGS_IPV4 = 0x10, + IGB_TX_FLAGS_CSUM = 0x20, +}; + +/* VLAN info */ +#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IGB_TX_FLAGS_VLAN_SHIFT 16 + +/* + * The largest size we can write to the descriptor is 65535. In order to + * maintain a power of two alignment we have to limit ourselves to 32K. + */ +#define IGB_MAX_TXD_PWR 15 +#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR) + +/* Tx Descriptors needed, worst case */ +#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD) +#ifndef MAX_SKB_FRAGS +#define DESC_NEEDED 4 +#elif (MAX_SKB_FRAGS < 16) +#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4) +#else +#define DESC_NEEDED (MAX_SKB_FRAGS + 4) +#endif + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer */ +struct igb_tx_buffer { + union e1000_adv_tx_desc *next_to_watch; + unsigned long time_stamp; + struct sk_buff *skb; + unsigned int bytecount; + u16 gso_segs; + __be16 protocol; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; +}; + +struct igb_rx_buffer { + dma_addr_t dma; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + struct sk_buff *skb; +#else + struct page *page; + u32 page_offset; +#endif +}; + +struct igb_tx_queue_stats { + u64 packets; + u64 bytes; + u64 restart_queue; +}; + +struct igb_rx_queue_stats { + u64 packets; + u64 bytes; + u64 drops; + u64 csum_err; + u64 alloc_failed; + u64 ipv4_packets; /* IPv4 headers processed */ + u64 ipv4e_packets; /* IPv4E headers with extensions processed */ + u64 ipv6_packets; /* IPv6 headers processed */ + u64 ipv6e_packets; /* IPv6E headers with extensions processed */ + u64 tcp_packets; /* TCP headers processed */ + u64 udp_packets; /* UDP headers processed */ + u64 sctp_packets; /* SCTP headers processed */ + u64 nfs_packets; /* NFS headers processe */ +}; + +struct igb_ring_container { + struct igb_ring *ring; /* pointer to linked list of rings */ + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_packets; /* total packets processed this int */ + u16 work_limit; /* total work allowed per interrupt */ + u8 count; /* total number of rings in vector */ + u8 itr; /* current ITR setting for ring */ +}; + +struct igb_ring { + struct igb_q_vector *q_vector; /* backlink to q_vector */ + struct net_device *netdev; /* back pointer to net_device */ + struct device *dev; /* device for dma mapping */ + union { /* array of buffer info structs */ + struct igb_tx_buffer *tx_buffer_info; + struct igb_rx_buffer *rx_buffer_info; + }; +#ifdef HAVE_PTP_1588_CLOCK + unsigned long last_rx_timestamp; +#endif /* HAVE_PTP_1588_CLOCK */ + void *desc; /* descriptor ring memory */ + unsigned long flags; /* ring specific flags */ + void __iomem *tail; /* pointer to ring tail register */ + dma_addr_t dma; /* phys address of the ring */ + unsigned int size; /* length of desc. ring in bytes */ + + u16 count; /* number of desc. in the ring */ + u8 queue_index; /* logical index of the ring*/ + u8 reg_idx; /* physical index of the ring */ + + /* everything past this point are written often */ + u16 next_to_clean; + u16 next_to_use; + u16 next_to_alloc; + + union { + /* TX */ + struct { + struct igb_tx_queue_stats tx_stats; + }; + /* RX */ + struct { + struct igb_rx_queue_stats rx_stats; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + u16 rx_buffer_len; +#else + struct sk_buff *skb; +#endif + }; + }; +#ifdef CONFIG_IGB_VMDQ_NETDEV + struct net_device *vmdq_netdev; + int vqueue_index; /* queue index for virtual netdev */ +#endif +} ____cacheline_internodealigned_in_smp; + +struct igb_q_vector { + struct igb_adapter *adapter; /* backlink */ + int cpu; /* CPU for DCA */ + u32 eims_value; /* EIMS mask value */ + + u16 itr_val; + u8 set_itr; + void __iomem *itr_register; + + struct igb_ring_container rx, tx; + + struct napi_struct napi; +#ifndef IGB_NO_LRO + struct igb_lro_list lrolist; /* LRO list for queue vector*/ +#endif + char name[IFNAMSIZ + 9]; +#ifndef HAVE_NETDEV_NAPI_LIST + struct net_device poll_dev; +#endif + + /* for dynamic allocation of rings associated with this q_vector */ + struct igb_ring ring[0] ____cacheline_internodealigned_in_smp; +}; + +enum e1000_ring_flags_t { +#ifndef HAVE_NDO_SET_FEATURES + IGB_RING_FLAG_RX_CSUM, +#endif + IGB_RING_FLAG_RX_SCTP_CSUM, + IGB_RING_FLAG_RX_LB_VLAN_BSWAP, + IGB_RING_FLAG_TX_CTX_IDX, + IGB_RING_FLAG_TX_DETECT_HANG, +}; + +struct igb_mac_addr { + u8 addr[ETH_ALEN]; + u16 queue; + u16 state; /* bitmask */ +}; +#define IGB_MAC_STATE_DEFAULT 0x1 +#define IGB_MAC_STATE_MODIFIED 0x2 +#define IGB_MAC_STATE_IN_USE 0x4 + +#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) + +#define IGB_RX_DESC(R, i) \ + (&(((union e1000_adv_rx_desc *)((R)->desc))[i])) +#define IGB_TX_DESC(R, i) \ + (&(((union e1000_adv_tx_desc *)((R)->desc))[i])) +#define IGB_TX_CTXTDESC(R, i) \ + (&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i])) + +#ifdef CONFIG_IGB_VMDQ_NETDEV +#define netdev_ring(ring) \ + ((ring->vmdq_netdev ? ring->vmdq_netdev : ring->netdev)) +#define ring_queue_index(ring) \ + ((ring->vmdq_netdev ? ring->vqueue_index : ring->queue_index)) +#else +#define netdev_ring(ring) (ring->netdev) +#define ring_queue_index(ring) (ring->queue_index) +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + +/* igb_test_staterr - tests bits within Rx descriptor status and error fields */ +static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc, + const u32 stat_err_bits) +{ + return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); +} + +/* igb_desc_unused - calculate if we have unused descriptors */ +static inline u16 igb_desc_unused(const struct igb_ring *ring) +{ + u16 ntc = ring->next_to_clean; + u16 ntu = ring->next_to_use; + + return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; +} + +#ifdef CONFIG_BQL +static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring) +{ + return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); +} +#endif /* CONFIG_BQL */ + +// #ifdef EXT_THERMAL_SENSOR_SUPPORT +// #ifdef IGB_PROCFS +struct igb_therm_proc_data +{ + struct e1000_hw *hw; + struct e1000_thermal_diode_data *sensor_data; +}; + +// #endif /* IGB_PROCFS */ +// #endif /* EXT_THERMAL_SENSOR_SUPPORT */ + +#ifdef IGB_HWMON +#define IGB_HWMON_TYPE_LOC 0 +#define IGB_HWMON_TYPE_TEMP 1 +#define IGB_HWMON_TYPE_CAUTION 2 +#define IGB_HWMON_TYPE_MAX 3 + +struct hwmon_attr { + struct device_attribute dev_attr; + struct e1000_hw *hw; + struct e1000_thermal_diode_data *sensor; + char name[12]; + }; + +struct hwmon_buff { + struct device *device; + struct hwmon_attr *hwmon_list; + unsigned int n_hwmon; + }; +#endif /* IGB_HWMON */ + +/* board specific private data structure */ +struct igb_adapter { +#ifdef HAVE_VLAN_RX_REGISTER + /* vlgrp must be first member of structure */ + struct vlan_group *vlgrp; +#else + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; +#endif + struct net_device *netdev; + + unsigned long state; + unsigned int flags; + + unsigned int num_q_vectors; + struct msix_entry *msix_entries; + + + /* TX */ + u16 tx_work_limit; + u32 tx_timeout_count; + int num_tx_queues; + struct igb_ring *tx_ring[IGB_MAX_TX_QUEUES]; + + /* RX */ + int num_rx_queues; + struct igb_ring *rx_ring[IGB_MAX_RX_QUEUES]; + + struct timer_list watchdog_timer; + struct timer_list dma_err_timer; + struct timer_list phy_info_timer; + u16 mng_vlan_id; + u32 bd_number; + u32 wol; + u32 en_mng_pt; + u16 link_speed; + u16 link_duplex; + u8 port_num; + + /* Interrupt Throttle Rate */ + u32 rx_itr_setting; + u32 tx_itr_setting; + + struct work_struct reset_task; + struct work_struct watchdog_task; + struct work_struct dma_err_task; + bool fc_autoneg; + u8 tx_timeout_factor; + +#ifdef DEBUG + bool tx_hang_detected; + bool disable_hw_reset; +#endif + u32 max_frame_size; + + /* OS defined structs */ + struct pci_dev *pdev; +#ifndef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats net_stats; +#endif +#ifndef IGB_NO_LRO + struct igb_lro_stats lro_stats; +#endif + + /* structs defined in e1000_hw.h */ + struct e1000_hw hw; + struct e1000_hw_stats stats; + struct e1000_phy_info phy_info; + struct e1000_phy_stats phy_stats; + +#ifdef ETHTOOL_TEST + u32 test_icr; + struct igb_ring test_tx_ring; + struct igb_ring test_rx_ring; +#endif + + int msg_enable; + + struct igb_q_vector *q_vector[MAX_Q_VECTORS]; + u32 eims_enable_mask; + u32 eims_other; + + /* to not mess up cache alignment, always add to the bottom */ + u32 *config_space; + u16 tx_ring_count; + u16 rx_ring_count; + struct vf_data_storage *vf_data; +#ifdef IFLA_VF_MAX + int vf_rate_link_speed; +#endif + u32 lli_port; + u32 lli_size; + unsigned int vfs_allocated_count; + /* Malicious Driver Detection flag. Valid only when SR-IOV is enabled */ + bool mdd; + int int_mode; + u32 rss_queues; + u32 vmdq_pools; + char fw_version[32]; + u32 wvbr; + struct igb_mac_addr *mac_table; +#ifdef CONFIG_IGB_VMDQ_NETDEV + struct net_device *vmdq_netdev[IGB_MAX_VMDQ_QUEUES]; +#endif + int vferr_refcount; + int dmac; + u32 *shadow_vfta; + + /* External Thermal Sensor support flag */ + bool ets; +#ifdef IGB_HWMON + struct hwmon_buff igb_hwmon_buff; +#else /* IGB_HWMON */ +#ifdef IGB_PROCFS + struct proc_dir_entry *eth_dir; + struct proc_dir_entry *info_dir; + struct proc_dir_entry *therm_dir[E1000_MAX_SENSORS]; + struct igb_therm_proc_data therm_data[E1000_MAX_SENSORS]; + bool old_lsc; +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + u32 etrack_id; + +#ifdef HAVE_PTP_1588_CLOCK + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_caps; + struct delayed_work ptp_overflow_work; + struct work_struct ptp_tx_work; + struct sk_buff *ptp_tx_skb; + unsigned long ptp_tx_start; + unsigned long last_rx_ptp_check; + spinlock_t tmreg_lock; + struct cyclecounter cc; + struct timecounter tc; + u32 tx_hwtstamp_timeouts; + u32 rx_hwtstamp_cleared; +#endif /* HAVE_PTP_1588_CLOCK */ + +#ifdef HAVE_I2C_SUPPORT + struct i2c_algo_bit_data i2c_algo; + struct i2c_adapter i2c_adap; + struct i2c_client *i2c_client; +#endif /* HAVE_I2C_SUPPORT */ + unsigned long link_check_timeout; + + + int devrc; + + int copper_tries; + u16 eee_advert; +}; + +#ifdef CONFIG_IGB_VMDQ_NETDEV +struct igb_vmdq_adapter { +#ifdef HAVE_VLAN_RX_REGISTER + /* vlgrp must be first member of structure */ + struct vlan_group *vlgrp; +#else + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; +#endif + struct igb_adapter *real_adapter; + struct net_device *vnetdev; + struct net_device_stats net_stats; + struct igb_ring *tx_ring; + struct igb_ring *rx_ring; +}; +#endif + +#define IGB_FLAG_HAS_MSI (1 << 0) +#define IGB_FLAG_DCA_ENABLED (1 << 1) +#define IGB_FLAG_LLI_PUSH (1 << 2) +#define IGB_FLAG_QUAD_PORT_A (1 << 3) +#define IGB_FLAG_QUEUE_PAIRS (1 << 4) +#define IGB_FLAG_EEE (1 << 5) +#define IGB_FLAG_DMAC (1 << 6) +#define IGB_FLAG_DETECT_BAD_DMA (1 << 7) +#define IGB_FLAG_PTP (1 << 8) +#define IGB_FLAG_RSS_FIELD_IPV4_UDP (1 << 9) +#define IGB_FLAG_RSS_FIELD_IPV6_UDP (1 << 10) +#define IGB_FLAG_WOL_SUPPORTED (1 << 11) +#define IGB_FLAG_NEED_LINK_UPDATE (1 << 12) +#define IGB_FLAG_LOOPBACK_ENABLE (1 << 13) +#define IGB_FLAG_MEDIA_RESET (1 << 14) +#define IGB_FLAG_MAS_ENABLE (1 << 15) + +/* Media Auto Sense */ +#define IGB_MAS_ENABLE_0 0X0001 +#define IGB_MAS_ENABLE_1 0X0002 +#define IGB_MAS_ENABLE_2 0X0004 +#define IGB_MAS_ENABLE_3 0X0008 + +#define IGB_MIN_TXPBSIZE 20408 +#define IGB_TX_BUF_4096 4096 + +#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */ + +/* DMA Coalescing defines */ +#define IGB_DMAC_DISABLE 0 +#define IGB_DMAC_MIN 250 +#define IGB_DMAC_500 500 +#define IGB_DMAC_EN_DEFAULT 1000 +#define IGB_DMAC_2000 2000 +#define IGB_DMAC_3000 3000 +#define IGB_DMAC_4000 4000 +#define IGB_DMAC_5000 5000 +#define IGB_DMAC_6000 6000 +#define IGB_DMAC_7000 7000 +#define IGB_DMAC_8000 8000 +#define IGB_DMAC_9000 9000 +#define IGB_DMAC_MAX 10000 + +#define IGB_82576_TSYNC_SHIFT 19 +#define IGB_82580_TSYNC_SHIFT 24 +#define IGB_TS_HDR_LEN 16 + +/* CEM Support */ +#define FW_HDR_LEN 0x4 +#define FW_CMD_DRV_INFO 0xDD +#define FW_CMD_DRV_INFO_LEN 0x5 +#define FW_CMD_RESERVED 0X0 +#define FW_RESP_SUCCESS 0x1 +#define FW_UNUSED_VER 0x0 +#define FW_MAX_RETRIES 3 +#define FW_STATUS_SUCCESS 0x1 +#define FW_FAMILY_DRV_VER 0Xffffffff + +#define IGB_MAX_LINK_TRIES 20 + +struct e1000_fw_hdr { + u8 cmd; + u8 buf_len; + union + { + u8 cmd_resv; + u8 ret_status; + } cmd_or_resp; + u8 checksum; +}; + +#pragma pack(push,1) +struct e1000_fw_drv_info { + struct e1000_fw_hdr hdr; + u8 port_num; + u32 drv_version; + u16 pad; /* end spacing to ensure length is mult. of dword */ + u8 pad2; /* end spacing to ensure length is mult. of dword2 */ +}; +#pragma pack(pop) + +enum e1000_state_t { + __IGB_TESTING, + __IGB_RESETTING, + __IGB_DOWN +}; + +extern char igb_driver_name[]; +extern char igb_driver_version[]; + +extern int igb_up(struct igb_adapter *); +extern void igb_down(struct igb_adapter *); +extern void igb_reinit_locked(struct igb_adapter *); +extern void igb_reset(struct igb_adapter *); +extern int igb_set_spd_dplx(struct igb_adapter *, u16); +extern int igb_setup_tx_resources(struct igb_ring *); +extern int igb_setup_rx_resources(struct igb_ring *); +extern void igb_free_tx_resources(struct igb_ring *); +extern void igb_free_rx_resources(struct igb_ring *); +extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *); +extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *); +extern void igb_setup_tctl(struct igb_adapter *); +extern void igb_setup_rctl(struct igb_adapter *); +extern netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *); +extern void igb_unmap_and_free_tx_resource(struct igb_ring *, + struct igb_tx_buffer *); +extern void igb_alloc_rx_buffers(struct igb_ring *, u16); +extern void igb_clean_rx_ring(struct igb_ring *); +extern void igb_update_stats(struct igb_adapter *); +extern bool igb_has_link(struct igb_adapter *adapter); +extern void igb_set_ethtool_ops(struct net_device *); +extern void igb_check_options(struct igb_adapter *); +extern void igb_power_up_link(struct igb_adapter *); +#ifdef HAVE_PTP_1588_CLOCK +extern void igb_ptp_init(struct igb_adapter *adapter); +extern void igb_ptp_stop(struct igb_adapter *adapter); +extern void igb_ptp_reset(struct igb_adapter *adapter); +extern void igb_ptp_tx_work(struct work_struct *work); +extern void igb_ptp_rx_hang(struct igb_adapter *adapter); +extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); +extern void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, + struct sk_buff *skb); +extern void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, + unsigned char *va, + struct sk_buff *skb); +static inline void igb_ptp_rx_hwtstamp(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); + skb_pull(skb, IGB_TS_HDR_LEN); +#endif + return; + } + + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS)) + igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + + /* Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + rx_ring->last_rx_timestamp = jiffies; +} + +extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd); +#endif /* HAVE_PTP_1588_CLOCK */ +#ifdef ETHTOOL_OPS_COMPAT +extern int ethtool_ioctl(struct ifreq *); +#endif +extern int igb_write_mc_addr_list(struct net_device *netdev); +extern int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue); +extern int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue); +extern int igb_available_rars(struct igb_adapter *adapter); +extern s32 igb_vlvf_set(struct igb_adapter *, u32, bool, u32); +extern void igb_configure_vt_default_pool(struct igb_adapter *adapter); +extern void igb_enable_vlan_tags(struct igb_adapter *adapter); +#ifndef HAVE_VLAN_RX_REGISTER +extern void igb_vlan_mode(struct net_device *, u32); +#endif + +#define E1000_PCS_CFG_IGN_SD 1 + +#ifdef IGB_HWMON +void igb_sysfs_exit(struct igb_adapter *adapter); +int igb_sysfs_init(struct igb_adapter *adapter); +#else +#ifdef IGB_PROCFS +int igb_procfs_init(struct igb_adapter* adapter); +void igb_procfs_exit(struct igb_adapter* adapter); +int igb_procfs_topdir_init(void); +void igb_procfs_topdir_exit(void); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + + + +#endif /* _IGB_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c new file mode 100644 index 00000000..c07f9f53 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c @@ -0,0 +1,28 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c new file mode 100644 index 00000000..af7e68a5 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -0,0 +1,2857 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* ethtool support for igb */ + +#include +#include + +#ifdef SIOCETHTOOL +#include +#ifdef CONFIG_PM_RUNTIME +#include +#endif /* CONFIG_PM_RUNTIME */ +#include + +#include "igb.h" +#include "igb_regtest.h" +#include +#ifdef ETHTOOL_GEEE +#include +#endif + +#ifdef ETHTOOL_OPS_COMPAT +#include "kcompat_ethtool.c" +#endif +#ifdef ETHTOOL_GSTATS +struct igb_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define IGB_STAT(_name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(struct igb_adapter, _stat), \ + .stat_offset = offsetof(struct igb_adapter, _stat) \ +} +static const struct igb_stats igb_gstrings_stats[] = { + IGB_STAT("rx_packets", stats.gprc), + IGB_STAT("tx_packets", stats.gptc), + IGB_STAT("rx_bytes", stats.gorc), + IGB_STAT("tx_bytes", stats.gotc), + IGB_STAT("rx_broadcast", stats.bprc), + IGB_STAT("tx_broadcast", stats.bptc), + IGB_STAT("rx_multicast", stats.mprc), + IGB_STAT("tx_multicast", stats.mptc), + IGB_STAT("multicast", stats.mprc), + IGB_STAT("collisions", stats.colc), + IGB_STAT("rx_crc_errors", stats.crcerrs), + IGB_STAT("rx_no_buffer_count", stats.rnbc), + IGB_STAT("rx_missed_errors", stats.mpc), + IGB_STAT("tx_aborted_errors", stats.ecol), + IGB_STAT("tx_carrier_errors", stats.tncrs), + IGB_STAT("tx_window_errors", stats.latecol), + IGB_STAT("tx_abort_late_coll", stats.latecol), + IGB_STAT("tx_deferred_ok", stats.dc), + IGB_STAT("tx_single_coll_ok", stats.scc), + IGB_STAT("tx_multi_coll_ok", stats.mcc), + IGB_STAT("tx_timeout_count", tx_timeout_count), + IGB_STAT("rx_long_length_errors", stats.roc), + IGB_STAT("rx_short_length_errors", stats.ruc), + IGB_STAT("rx_align_errors", stats.algnerrc), + IGB_STAT("tx_tcp_seg_good", stats.tsctc), + IGB_STAT("tx_tcp_seg_failed", stats.tsctfc), + IGB_STAT("rx_flow_control_xon", stats.xonrxc), + IGB_STAT("rx_flow_control_xoff", stats.xoffrxc), + IGB_STAT("tx_flow_control_xon", stats.xontxc), + IGB_STAT("tx_flow_control_xoff", stats.xofftxc), + IGB_STAT("rx_long_byte_count", stats.gorc), + IGB_STAT("tx_dma_out_of_sync", stats.doosync), +#ifndef IGB_NO_LRO + IGB_STAT("lro_aggregated", lro_stats.coal), + IGB_STAT("lro_flushed", lro_stats.flushed), +#endif /* IGB_LRO */ + IGB_STAT("tx_smbus", stats.mgptc), + IGB_STAT("rx_smbus", stats.mgprc), + IGB_STAT("dropped_smbus", stats.mgpdc), + IGB_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), + IGB_STAT("os2bmc_tx_by_bmc", stats.b2ospc), + IGB_STAT("os2bmc_tx_by_host", stats.o2bspc), + IGB_STAT("os2bmc_rx_by_host", stats.b2ogprc), +#ifdef HAVE_PTP_1588_CLOCK + IGB_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts), + IGB_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), +#endif /* HAVE_PTP_1588_CLOCK */ +}; + +#define IGB_NETDEV_STAT(_net_stat) { \ + .stat_string = #_net_stat, \ + .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \ + .stat_offset = offsetof(struct net_device_stats, _net_stat) \ +} +static const struct igb_stats igb_gstrings_net_stats[] = { + IGB_NETDEV_STAT(rx_errors), + IGB_NETDEV_STAT(tx_errors), + IGB_NETDEV_STAT(tx_dropped), + IGB_NETDEV_STAT(rx_length_errors), + IGB_NETDEV_STAT(rx_over_errors), + IGB_NETDEV_STAT(rx_frame_errors), + IGB_NETDEV_STAT(rx_fifo_errors), + IGB_NETDEV_STAT(tx_fifo_errors), + IGB_NETDEV_STAT(tx_heartbeat_errors) +}; + +#define IGB_GLOBAL_STATS_LEN ARRAY_SIZE(igb_gstrings_stats) +#define IGB_NETDEV_STATS_LEN ARRAY_SIZE(igb_gstrings_net_stats) +#define IGB_RX_QUEUE_STATS_LEN \ + (sizeof(struct igb_rx_queue_stats) / sizeof(u64)) +#define IGB_TX_QUEUE_STATS_LEN \ + (sizeof(struct igb_tx_queue_stats) / sizeof(u64)) +#define IGB_QUEUE_STATS_LEN \ + ((((struct igb_adapter *)netdev_priv(netdev))->num_rx_queues * \ + IGB_RX_QUEUE_STATS_LEN) + \ + (((struct igb_adapter *)netdev_priv(netdev))->num_tx_queues * \ + IGB_TX_QUEUE_STATS_LEN)) +#define IGB_STATS_LEN \ + (IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN) + +#endif /* ETHTOOL_GSTATS */ +#ifdef ETHTOOL_TEST +static const char igb_gstrings_test[][ETH_GSTRING_LEN] = { + "Register test (offline)", "Eeprom test (offline)", + "Interrupt test (offline)", "Loopback test (offline)", + "Link test (on/offline)" +}; +#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN) +#endif /* ETHTOOL_TEST */ + +static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 status; + + if (hw->phy.media_type == e1000_media_type_copper) { + + ecmd->supported = (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Full| + SUPPORTED_Autoneg | + SUPPORTED_TP | + SUPPORTED_Pause); + ecmd->advertising = ADVERTISED_TP; + + if (hw->mac.autoneg == 1) { + ecmd->advertising |= ADVERTISED_Autoneg; + /* the e1000 autoneg seems to match ethtool nicely */ + ecmd->advertising |= hw->phy.autoneg_advertised; + } + + ecmd->port = PORT_TP; + ecmd->phy_address = hw->phy.addr; + ecmd->transceiver = XCVR_INTERNAL; + + } else { + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full | + SUPPORTED_FIBRE | + SUPPORTED_Autoneg | + SUPPORTED_Pause); + if (hw->mac.type == e1000_i354) + ecmd->supported |= (SUPPORTED_2500baseX_Full); + + ecmd->advertising = ADVERTISED_FIBRE; + + switch (adapter->link_speed) { + case SPEED_2500: + ecmd->advertising = ADVERTISED_2500baseX_Full; + break; + case SPEED_1000: + ecmd->advertising = ADVERTISED_1000baseT_Full; + break; + case SPEED_100: + ecmd->advertising = ADVERTISED_100baseT_Full; + break; + default: + break; + } + + if (hw->mac.autoneg == 1) + ecmd->advertising |= ADVERTISED_Autoneg; + + ecmd->port = PORT_FIBRE; + ecmd->transceiver = XCVR_EXTERNAL; + } + + if (hw->mac.autoneg != 1) + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + + if (hw->fc.requested_mode == e1000_fc_full) + ecmd->advertising |= ADVERTISED_Pause; + else if (hw->fc.requested_mode == e1000_fc_rx_pause) + ecmd->advertising |= (ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + else if (hw->fc.requested_mode == e1000_fc_tx_pause) + ecmd->advertising |= ADVERTISED_Asym_Pause; + else + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + + status = E1000_READ_REG(hw, E1000_STATUS); + + if (status & E1000_STATUS_LU) { + if ((hw->mac.type == e1000_i354) && + (status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) + ecmd->speed = SPEED_2500; + else if (status & E1000_STATUS_SPEED_1000) + ecmd->speed = SPEED_1000; + else if (status & E1000_STATUS_SPEED_100) + ecmd->speed = SPEED_100; + else + ecmd->speed = SPEED_10; + + if ((status & E1000_STATUS_FD) || + hw->phy.media_type != e1000_media_type_copper) + ecmd->duplex = DUPLEX_FULL; + else + ecmd->duplex = DUPLEX_HALF; + + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + if ((hw->phy.media_type == e1000_media_type_fiber) || + hw->mac.autoneg) + ecmd->autoneg = AUTONEG_ENABLE; + else + ecmd->autoneg = AUTONEG_DISABLE; +#ifdef ETH_TP_MDI_X + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if (hw->phy.media_type == e1000_media_type_copper) + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + +#ifdef ETH_TP_MDI_AUTO + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + +#endif +#endif /* ETH_TP_MDI_X */ + return 0; +} + +static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + if (ecmd->duplex == DUPLEX_HALF) { + if (!hw->dev_spec._82575.eee_disable) + dev_info(pci_dev_to_dev(adapter->pdev), "EEE disabled: not supported with half duplex\n"); + hw->dev_spec._82575.eee_disable = true; + } else { + if (hw->dev_spec._82575.eee_disable) + dev_info(pci_dev_to_dev(adapter->pdev), "EEE enabled\n"); + hw->dev_spec._82575.eee_disable = false; + } + + /* When SoL/IDER sessions are active, autoneg/speed/duplex + * cannot be changed */ + if (e1000_check_reset_block(hw)) { + dev_err(pci_dev_to_dev(adapter->pdev), "Cannot change link " + "characteristics when SoL/IDER is active.\n"); + return -EINVAL; + } + +#ifdef ETH_TP_MDI_AUTO + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + +#endif /* ETH_TP_MDI_AUTO */ + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (ecmd->autoneg == AUTONEG_ENABLE) { + hw->mac.autoneg = 1; + if (hw->phy.media_type == e1000_media_type_fiber) { + hw->phy.autoneg_advertised = ecmd->advertising | + ADVERTISED_FIBRE | + ADVERTISED_Autoneg; + switch (adapter->link_speed) { + case SPEED_2500: + hw->phy.autoneg_advertised = + ADVERTISED_2500baseX_Full; + break; + case SPEED_1000: + hw->phy.autoneg_advertised = + ADVERTISED_1000baseT_Full; + break; + case SPEED_100: + hw->phy.autoneg_advertised = + ADVERTISED_100baseT_Full; + break; + default: + break; + } + } else { + hw->phy.autoneg_advertised = ecmd->advertising | + ADVERTISED_TP | + ADVERTISED_Autoneg; + } + ecmd->advertising = hw->phy.autoneg_advertised; + if (adapter->fc_autoneg) + hw->fc.requested_mode = e1000_fc_default; + } else { + if (igb_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) { + clear_bit(__IGB_RESETTING, &adapter->state); + return -EINVAL; + } + } + +#ifdef ETH_TP_MDI_AUTO + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + +#endif /* ETH_TP_MDI_AUTO */ + /* reset the link */ + if (netif_running(adapter->netdev)) { + igb_down(adapter); + igb_up(adapter); + } else + igb_reset(adapter); + + clear_bit(__IGB_RESETTING, &adapter->state); + return 0; +} + +static u32 igb_get_link(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_mac_info *mac = &adapter->hw.mac; + + /* + * If the link is not reported up to netdev, interrupts are disabled, + * and so the physical link state may have changed since we last + * looked. Set get_link_status to make sure that the true link + * state is interrogated, rather than pulling a cached and possibly + * stale link state from the driver. + */ + if (!netif_carrier_ok(netdev)) + mac->get_link_status = 1; + + return igb_has_link(adapter); +} + +static void igb_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + pause->autoneg = + (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); + + if (hw->fc.current_mode == e1000_fc_rx_pause) + pause->rx_pause = 1; + else if (hw->fc.current_mode == e1000_fc_tx_pause) + pause->tx_pause = 1; + else if (hw->fc.current_mode == e1000_fc_full) { + pause->rx_pause = 1; + pause->tx_pause = 1; + } +} + +static int igb_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + int retval = 0; + + adapter->fc_autoneg = pause->autoneg; + + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (adapter->fc_autoneg == AUTONEG_ENABLE) { + hw->fc.requested_mode = e1000_fc_default; + if (netif_running(adapter->netdev)) { + igb_down(adapter); + igb_up(adapter); + } else { + igb_reset(adapter); + } + } else { + if (pause->rx_pause && pause->tx_pause) + hw->fc.requested_mode = e1000_fc_full; + else if (pause->rx_pause && !pause->tx_pause) + hw->fc.requested_mode = e1000_fc_rx_pause; + else if (!pause->rx_pause && pause->tx_pause) + hw->fc.requested_mode = e1000_fc_tx_pause; + else if (!pause->rx_pause && !pause->tx_pause) + hw->fc.requested_mode = e1000_fc_none; + + hw->fc.current_mode = hw->fc.requested_mode; + + if (hw->phy.media_type == e1000_media_type_fiber) { + retval = hw->mac.ops.setup_link(hw); + /* implicit goto out */ + } else { + retval = e1000_force_mac_fc(hw); + if (retval) + goto out; + e1000_set_fc_watermarks_generic(hw); + } + } + +out: + clear_bit(__IGB_RESETTING, &adapter->state); + return retval; +} + +static u32 igb_get_msglevel(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + return adapter->msg_enable; +} + +static void igb_set_msglevel(struct net_device *netdev, u32 data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + adapter->msg_enable = data; +} + +static int igb_get_regs_len(struct net_device *netdev) +{ +#define IGB_REGS_LEN 555 + return IGB_REGS_LEN * sizeof(u32); +} + +static void igb_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 *regs_buff = p; + u8 i; + + memset(p, 0, IGB_REGS_LEN * sizeof(u32)); + + regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id; + + /* General Registers */ + regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); + regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); + regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); + regs_buff[3] = E1000_READ_REG(hw, E1000_MDIC); + regs_buff[4] = E1000_READ_REG(hw, E1000_SCTL); + regs_buff[5] = E1000_READ_REG(hw, E1000_CONNSW); + regs_buff[6] = E1000_READ_REG(hw, E1000_VET); + regs_buff[7] = E1000_READ_REG(hw, E1000_LEDCTL); + regs_buff[8] = E1000_READ_REG(hw, E1000_PBA); + regs_buff[9] = E1000_READ_REG(hw, E1000_PBS); + regs_buff[10] = E1000_READ_REG(hw, E1000_FRTIMER); + regs_buff[11] = E1000_READ_REG(hw, E1000_TCPTIMER); + + /* NVM Register */ + regs_buff[12] = E1000_READ_REG(hw, E1000_EECD); + + /* Interrupt */ + /* Reading EICS for EICR because they read the + * same but EICS does not clear on read */ + regs_buff[13] = E1000_READ_REG(hw, E1000_EICS); + regs_buff[14] = E1000_READ_REG(hw, E1000_EICS); + regs_buff[15] = E1000_READ_REG(hw, E1000_EIMS); + regs_buff[16] = E1000_READ_REG(hw, E1000_EIMC); + regs_buff[17] = E1000_READ_REG(hw, E1000_EIAC); + regs_buff[18] = E1000_READ_REG(hw, E1000_EIAM); + /* Reading ICS for ICR because they read the + * same but ICS does not clear on read */ + regs_buff[19] = E1000_READ_REG(hw, E1000_ICS); + regs_buff[20] = E1000_READ_REG(hw, E1000_ICS); + regs_buff[21] = E1000_READ_REG(hw, E1000_IMS); + regs_buff[22] = E1000_READ_REG(hw, E1000_IMC); + regs_buff[23] = E1000_READ_REG(hw, E1000_IAC); + regs_buff[24] = E1000_READ_REG(hw, E1000_IAM); + regs_buff[25] = E1000_READ_REG(hw, E1000_IMIRVP); + + /* Flow Control */ + regs_buff[26] = E1000_READ_REG(hw, E1000_FCAL); + regs_buff[27] = E1000_READ_REG(hw, E1000_FCAH); + regs_buff[28] = E1000_READ_REG(hw, E1000_FCTTV); + regs_buff[29] = E1000_READ_REG(hw, E1000_FCRTL); + regs_buff[30] = E1000_READ_REG(hw, E1000_FCRTH); + regs_buff[31] = E1000_READ_REG(hw, E1000_FCRTV); + + /* Receive */ + regs_buff[32] = E1000_READ_REG(hw, E1000_RCTL); + regs_buff[33] = E1000_READ_REG(hw, E1000_RXCSUM); + regs_buff[34] = E1000_READ_REG(hw, E1000_RLPML); + regs_buff[35] = E1000_READ_REG(hw, E1000_RFCTL); + regs_buff[36] = E1000_READ_REG(hw, E1000_MRQC); + regs_buff[37] = E1000_READ_REG(hw, E1000_VT_CTL); + + /* Transmit */ + regs_buff[38] = E1000_READ_REG(hw, E1000_TCTL); + regs_buff[39] = E1000_READ_REG(hw, E1000_TCTL_EXT); + regs_buff[40] = E1000_READ_REG(hw, E1000_TIPG); + regs_buff[41] = E1000_READ_REG(hw, E1000_DTXCTL); + + /* Wake Up */ + regs_buff[42] = E1000_READ_REG(hw, E1000_WUC); + regs_buff[43] = E1000_READ_REG(hw, E1000_WUFC); + regs_buff[44] = E1000_READ_REG(hw, E1000_WUS); + regs_buff[45] = E1000_READ_REG(hw, E1000_IPAV); + regs_buff[46] = E1000_READ_REG(hw, E1000_WUPL); + + /* MAC */ + regs_buff[47] = E1000_READ_REG(hw, E1000_PCS_CFG0); + regs_buff[48] = E1000_READ_REG(hw, E1000_PCS_LCTL); + regs_buff[49] = E1000_READ_REG(hw, E1000_PCS_LSTAT); + regs_buff[50] = E1000_READ_REG(hw, E1000_PCS_ANADV); + regs_buff[51] = E1000_READ_REG(hw, E1000_PCS_LPAB); + regs_buff[52] = E1000_READ_REG(hw, E1000_PCS_NPTX); + regs_buff[53] = E1000_READ_REG(hw, E1000_PCS_LPABNP); + + /* Statistics */ + regs_buff[54] = adapter->stats.crcerrs; + regs_buff[55] = adapter->stats.algnerrc; + regs_buff[56] = adapter->stats.symerrs; + regs_buff[57] = adapter->stats.rxerrc; + regs_buff[58] = adapter->stats.mpc; + regs_buff[59] = adapter->stats.scc; + regs_buff[60] = adapter->stats.ecol; + regs_buff[61] = adapter->stats.mcc; + regs_buff[62] = adapter->stats.latecol; + regs_buff[63] = adapter->stats.colc; + regs_buff[64] = adapter->stats.dc; + regs_buff[65] = adapter->stats.tncrs; + regs_buff[66] = adapter->stats.sec; + regs_buff[67] = adapter->stats.htdpmc; + regs_buff[68] = adapter->stats.rlec; + regs_buff[69] = adapter->stats.xonrxc; + regs_buff[70] = adapter->stats.xontxc; + regs_buff[71] = adapter->stats.xoffrxc; + regs_buff[72] = adapter->stats.xofftxc; + regs_buff[73] = adapter->stats.fcruc; + regs_buff[74] = adapter->stats.prc64; + regs_buff[75] = adapter->stats.prc127; + regs_buff[76] = adapter->stats.prc255; + regs_buff[77] = adapter->stats.prc511; + regs_buff[78] = adapter->stats.prc1023; + regs_buff[79] = adapter->stats.prc1522; + regs_buff[80] = adapter->stats.gprc; + regs_buff[81] = adapter->stats.bprc; + regs_buff[82] = adapter->stats.mprc; + regs_buff[83] = adapter->stats.gptc; + regs_buff[84] = adapter->stats.gorc; + regs_buff[86] = adapter->stats.gotc; + regs_buff[88] = adapter->stats.rnbc; + regs_buff[89] = adapter->stats.ruc; + regs_buff[90] = adapter->stats.rfc; + regs_buff[91] = adapter->stats.roc; + regs_buff[92] = adapter->stats.rjc; + regs_buff[93] = adapter->stats.mgprc; + regs_buff[94] = adapter->stats.mgpdc; + regs_buff[95] = adapter->stats.mgptc; + regs_buff[96] = adapter->stats.tor; + regs_buff[98] = adapter->stats.tot; + regs_buff[100] = adapter->stats.tpr; + regs_buff[101] = adapter->stats.tpt; + regs_buff[102] = adapter->stats.ptc64; + regs_buff[103] = adapter->stats.ptc127; + regs_buff[104] = adapter->stats.ptc255; + regs_buff[105] = adapter->stats.ptc511; + regs_buff[106] = adapter->stats.ptc1023; + regs_buff[107] = adapter->stats.ptc1522; + regs_buff[108] = adapter->stats.mptc; + regs_buff[109] = adapter->stats.bptc; + regs_buff[110] = adapter->stats.tsctc; + regs_buff[111] = adapter->stats.iac; + regs_buff[112] = adapter->stats.rpthc; + regs_buff[113] = adapter->stats.hgptc; + regs_buff[114] = adapter->stats.hgorc; + regs_buff[116] = adapter->stats.hgotc; + regs_buff[118] = adapter->stats.lenerrs; + regs_buff[119] = adapter->stats.scvpc; + regs_buff[120] = adapter->stats.hrmpc; + + for (i = 0; i < 4; i++) + regs_buff[121 + i] = E1000_READ_REG(hw, E1000_SRRCTL(i)); + for (i = 0; i < 4; i++) + regs_buff[125 + i] = E1000_READ_REG(hw, E1000_PSRTYPE(i)); + for (i = 0; i < 4; i++) + regs_buff[129 + i] = E1000_READ_REG(hw, E1000_RDBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[133 + i] = E1000_READ_REG(hw, E1000_RDBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[137 + i] = E1000_READ_REG(hw, E1000_RDLEN(i)); + for (i = 0; i < 4; i++) + regs_buff[141 + i] = E1000_READ_REG(hw, E1000_RDH(i)); + for (i = 0; i < 4; i++) + regs_buff[145 + i] = E1000_READ_REG(hw, E1000_RDT(i)); + for (i = 0; i < 4; i++) + regs_buff[149 + i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); + + for (i = 0; i < 10; i++) + regs_buff[153 + i] = E1000_READ_REG(hw, E1000_EITR(i)); + for (i = 0; i < 8; i++) + regs_buff[163 + i] = E1000_READ_REG(hw, E1000_IMIR(i)); + for (i = 0; i < 8; i++) + regs_buff[171 + i] = E1000_READ_REG(hw, E1000_IMIREXT(i)); + for (i = 0; i < 16; i++) + regs_buff[179 + i] = E1000_READ_REG(hw, E1000_RAL(i)); + for (i = 0; i < 16; i++) + regs_buff[195 + i] = E1000_READ_REG(hw, E1000_RAH(i)); + + for (i = 0; i < 4; i++) + regs_buff[211 + i] = E1000_READ_REG(hw, E1000_TDBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[215 + i] = E1000_READ_REG(hw, E1000_TDBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[219 + i] = E1000_READ_REG(hw, E1000_TDLEN(i)); + for (i = 0; i < 4; i++) + regs_buff[223 + i] = E1000_READ_REG(hw, E1000_TDH(i)); + for (i = 0; i < 4; i++) + regs_buff[227 + i] = E1000_READ_REG(hw, E1000_TDT(i)); + for (i = 0; i < 4; i++) + regs_buff[231 + i] = E1000_READ_REG(hw, E1000_TXDCTL(i)); + for (i = 0; i < 4; i++) + regs_buff[235 + i] = E1000_READ_REG(hw, E1000_TDWBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[239 + i] = E1000_READ_REG(hw, E1000_TDWBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[243 + i] = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); + + for (i = 0; i < 4; i++) + regs_buff[247 + i] = E1000_READ_REG(hw, E1000_IP4AT_REG(i)); + for (i = 0; i < 4; i++) + regs_buff[251 + i] = E1000_READ_REG(hw, E1000_IP6AT_REG(i)); + for (i = 0; i < 32; i++) + regs_buff[255 + i] = E1000_READ_REG(hw, E1000_WUPM_REG(i)); + for (i = 0; i < 128; i++) + regs_buff[287 + i] = E1000_READ_REG(hw, E1000_FFMT_REG(i)); + for (i = 0; i < 128; i++) + regs_buff[415 + i] = E1000_READ_REG(hw, E1000_FFVT_REG(i)); + for (i = 0; i < 4; i++) + regs_buff[543 + i] = E1000_READ_REG(hw, E1000_FFLT_REG(i)); + + regs_buff[547] = E1000_READ_REG(hw, E1000_TDFH); + regs_buff[548] = E1000_READ_REG(hw, E1000_TDFT); + regs_buff[549] = E1000_READ_REG(hw, E1000_TDFHS); + regs_buff[550] = E1000_READ_REG(hw, E1000_TDFPC); + if (hw->mac.type > e1000_82580) { + regs_buff[551] = adapter->stats.o2bgptc; + regs_buff[552] = adapter->stats.b2ospc; + regs_buff[553] = adapter->stats.o2bspc; + regs_buff[554] = adapter->stats.b2ogprc; + } +} + +static int igb_get_eeprom_len(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + return adapter->hw.nvm.word_size * 2; +} + +static int igb_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u16 *eeprom_buff; + int first_word, last_word; + int ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EINVAL; + + eeprom->magic = hw->vendor_id | (hw->device_id << 16); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + + eeprom_buff = kmalloc(sizeof(u16) * + (last_word - first_word + 1), GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + if (hw->nvm.type == e1000_nvm_eeprom_spi) + ret_val = e1000_read_nvm(hw, first_word, + last_word - first_word + 1, + eeprom_buff); + else { + for (i = 0; i < last_word - first_word + 1; i++) { + ret_val = e1000_read_nvm(hw, first_word + i, 1, + &eeprom_buff[i]); + if (ret_val) + break; + } + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + eeprom_buff[i] = le16_to_cpu(eeprom_buff[i]); + + memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), + eeprom->len); + kfree(eeprom_buff); + + return ret_val; +} + +static int igb_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u16 *eeprom_buff; + void *ptr; + int max_len, first_word, last_word, ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EOPNOTSUPP; + + if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) + return -EFAULT; + + max_len = hw->nvm.word_size * 2; + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_buff = kmalloc(max_len, GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + ptr = (void *)eeprom_buff; + + if (eeprom->offset & 1) { + /* need read/modify/write of first changed EEPROM word */ + /* only the second byte of the word is being modified */ + ret_val = e1000_read_nvm(hw, first_word, 1, + &eeprom_buff[0]); + ptr++; + } + if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { + /* need read/modify/write of last changed EEPROM word */ + /* only the first byte of the word is being modified */ + ret_val = e1000_read_nvm(hw, last_word, 1, + &eeprom_buff[last_word - first_word]); + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(ptr, bytes, eeprom->len); + + for (i = 0; i < last_word - first_word + 1; i++) + cpu_to_le16s(&eeprom_buff[i]); + + ret_val = e1000_write_nvm(hw, first_word, + last_word - first_word + 1, eeprom_buff); + + /* Update the checksum if write succeeded. + * and flush shadow RAM for 82573 controllers */ + if (ret_val == 0) + e1000_update_nvm_checksum(hw); + + kfree(eeprom_buff); + return ret_val; +} + +static void igb_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + strncpy(drvinfo->driver, igb_driver_name, sizeof(drvinfo->driver) - 1); + strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1); + + strncpy(drvinfo->fw_version, adapter->fw_version, + sizeof(drvinfo->fw_version) - 1); + strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1); + drvinfo->n_stats = IGB_STATS_LEN; + drvinfo->testinfo_len = IGB_TEST_LEN; + drvinfo->regdump_len = igb_get_regs_len(netdev); + drvinfo->eedump_len = igb_get_eeprom_len(netdev); +} + +static void igb_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = IGB_MAX_RXD; + ring->tx_max_pending = IGB_MAX_TXD; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = adapter->rx_ring_count; + ring->tx_pending = adapter->tx_ring_count; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int igb_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct igb_ring *temp_ring; + int i, err = 0; + u16 new_rx_count, new_tx_count; + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + + new_rx_count = min(ring->rx_pending, (u32)IGB_MAX_RXD); + new_rx_count = max(new_rx_count, (u16)IGB_MIN_RXD); + new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE); + + new_tx_count = min(ring->tx_pending, (u32)IGB_MAX_TXD); + new_tx_count = max(new_tx_count, (u16)IGB_MIN_TXD); + new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE); + + if ((new_tx_count == adapter->tx_ring_count) && + (new_rx_count == adapter->rx_ring_count)) { + /* nothing to do */ + return 0; + } + + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (!netif_running(adapter->netdev)) { + for (i = 0; i < adapter->num_tx_queues; i++) + adapter->tx_ring[i]->count = new_tx_count; + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->count = new_rx_count; + adapter->tx_ring_count = new_tx_count; + adapter->rx_ring_count = new_rx_count; + goto clear_reset; + } + + if (adapter->num_tx_queues > adapter->num_rx_queues) + temp_ring = vmalloc(adapter->num_tx_queues * sizeof(struct igb_ring)); + else + temp_ring = vmalloc(adapter->num_rx_queues * sizeof(struct igb_ring)); + + if (!temp_ring) { + err = -ENOMEM; + goto clear_reset; + } + + igb_down(adapter); + + /* + * We can't just free everything and then setup again, + * because the ISRs in MSI-X mode get passed pointers + * to the tx and rx ring structs. + */ + if (new_tx_count != adapter->tx_ring_count) { + for (i = 0; i < adapter->num_tx_queues; i++) { + memcpy(&temp_ring[i], adapter->tx_ring[i], + sizeof(struct igb_ring)); + + temp_ring[i].count = new_tx_count; + err = igb_setup_tx_resources(&temp_ring[i]); + if (err) { + while (i) { + i--; + igb_free_tx_resources(&temp_ring[i]); + } + goto err_setup; + } + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + igb_free_tx_resources(adapter->tx_ring[i]); + + memcpy(adapter->tx_ring[i], &temp_ring[i], + sizeof(struct igb_ring)); + } + + adapter->tx_ring_count = new_tx_count; + } + + if (new_rx_count != adapter->rx_ring_count) { + for (i = 0; i < adapter->num_rx_queues; i++) { + memcpy(&temp_ring[i], adapter->rx_ring[i], + sizeof(struct igb_ring)); + + temp_ring[i].count = new_rx_count; + err = igb_setup_rx_resources(&temp_ring[i]); + if (err) { + while (i) { + i--; + igb_free_rx_resources(&temp_ring[i]); + } + goto err_setup; + } + + } + + for (i = 0; i < adapter->num_rx_queues; i++) { + igb_free_rx_resources(adapter->rx_ring[i]); + + memcpy(adapter->rx_ring[i], &temp_ring[i], + sizeof(struct igb_ring)); + } + + adapter->rx_ring_count = new_rx_count; + } +err_setup: + igb_up(adapter); + vfree(temp_ring); +clear_reset: + clear_bit(__IGB_RESETTING, &adapter->state); + return err; +} +static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data, + int reg, u32 mask, u32 write) +{ + struct e1000_hw *hw = &adapter->hw; + u32 pat, val; + static const u32 _test[] = + {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; + for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { + E1000_WRITE_REG(hw, reg, (_test[pat] & write)); + val = E1000_READ_REG(hw, reg) & mask; + if (val != (_test[pat] & write & mask)) { + dev_err(pci_dev_to_dev(adapter->pdev), "pattern test reg %04X " + "failed: got 0x%08X expected 0x%08X\n", + E1000_REGISTER(hw, reg), val, (_test[pat] & write & mask)); + *data = E1000_REGISTER(hw, reg); + return 1; + } + } + + return 0; +} + +static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data, + int reg, u32 mask, u32 write) +{ + struct e1000_hw *hw = &adapter->hw; + u32 val; + E1000_WRITE_REG(hw, reg, write & mask); + val = E1000_READ_REG(hw, reg); + if ((write & mask) != (val & mask)) { + dev_err(pci_dev_to_dev(adapter->pdev), "set/check reg %04X test failed:" + " got 0x%08X expected 0x%08X\n", reg, + (val & mask), (write & mask)); + *data = E1000_REGISTER(hw, reg); + return 1; + } + + return 0; +} + +#define REG_PATTERN_TEST(reg, mask, write) \ + do { \ + if (reg_pattern_test(adapter, data, reg, mask, write)) \ + return 1; \ + } while (0) + +#define REG_SET_AND_CHECK(reg, mask, write) \ + do { \ + if (reg_set_and_check(adapter, data, reg, mask, write)) \ + return 1; \ + } while (0) + +static int igb_reg_test(struct igb_adapter *adapter, u64 *data) +{ + struct e1000_hw *hw = &adapter->hw; + struct igb_reg_test *test; + u32 value, before, after; + u32 i, toggle; + + switch (adapter->hw.mac.type) { + case e1000_i350: + case e1000_i354: + test = reg_test_i350; + toggle = 0x7FEFF3FF; + break; + case e1000_i210: + case e1000_i211: + test = reg_test_i210; + toggle = 0x7FEFF3FF; + break; + case e1000_82580: + test = reg_test_82580; + toggle = 0x7FEFF3FF; + break; + case e1000_82576: + test = reg_test_82576; + toggle = 0x7FFFF3FF; + break; + default: + test = reg_test_82575; + toggle = 0x7FFFF3FF; + break; + } + + /* Because the status register is such a special case, + * we handle it separately from the rest of the register + * tests. Some bits are read-only, some toggle, and some + * are writable on newer MACs. + */ + before = E1000_READ_REG(hw, E1000_STATUS); + value = (E1000_READ_REG(hw, E1000_STATUS) & toggle); + E1000_WRITE_REG(hw, E1000_STATUS, toggle); + after = E1000_READ_REG(hw, E1000_STATUS) & toggle; + if (value != after) { + dev_err(pci_dev_to_dev(adapter->pdev), "failed STATUS register test " + "got: 0x%08X expected: 0x%08X\n", after, value); + *data = 1; + return 1; + } + /* restore previous status */ + E1000_WRITE_REG(hw, E1000_STATUS, before); + + /* Perform the remainder of the register test, looping through + * the test table until we either fail or reach the null entry. + */ + while (test->reg) { + for (i = 0; i < test->array_len; i++) { + switch (test->test_type) { + case PATTERN_TEST: + REG_PATTERN_TEST(test->reg + + (i * test->reg_offset), + test->mask, + test->write); + break; + case SET_READ_TEST: + REG_SET_AND_CHECK(test->reg + + (i * test->reg_offset), + test->mask, + test->write); + break; + case WRITE_NO_TEST: + writel(test->write, + (adapter->hw.hw_addr + test->reg) + + (i * test->reg_offset)); + break; + case TABLE32_TEST: + REG_PATTERN_TEST(test->reg + (i * 4), + test->mask, + test->write); + break; + case TABLE64_TEST_LO: + REG_PATTERN_TEST(test->reg + (i * 8), + test->mask, + test->write); + break; + case TABLE64_TEST_HI: + REG_PATTERN_TEST((test->reg + 4) + (i * 8), + test->mask, + test->write); + break; + } + } + test++; + } + + *data = 0; + return 0; +} + +static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data) +{ + *data = 0; + + /* Validate NVM checksum */ + if (e1000_validate_nvm_checksum(&adapter->hw) < 0) + *data = 2; + + return *data; +} + +static irqreturn_t igb_test_intr(int irq, void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *) data; + struct e1000_hw *hw = &adapter->hw; + + adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR); + + return IRQ_HANDLED; +} + +static int igb_intr_test(struct igb_adapter *adapter, u64 *data) +{ + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + u32 mask, ics_mask, i = 0, shared_int = TRUE; + u32 irq = adapter->pdev->irq; + + *data = 0; + + /* Hook up test interrupt handler just for this test */ + if (adapter->msix_entries) { + if (request_irq(adapter->msix_entries[0].vector, + &igb_test_intr, 0, netdev->name, adapter)) { + *data = 1; + return -1; + } + } else if (adapter->flags & IGB_FLAG_HAS_MSI) { + shared_int = FALSE; + if (request_irq(irq, + igb_test_intr, 0, netdev->name, adapter)) { + *data = 1; + return -1; + } + } else if (!request_irq(irq, igb_test_intr, IRQF_PROBE_SHARED, + netdev->name, adapter)) { + shared_int = FALSE; + } else if (request_irq(irq, &igb_test_intr, IRQF_SHARED, + netdev->name, adapter)) { + *data = 1; + return -1; + } + dev_info(pci_dev_to_dev(adapter->pdev), "testing %s interrupt\n", + (shared_int ? "shared" : "unshared")); + + /* Disable all the interrupts */ + E1000_WRITE_REG(hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + /* Define all writable bits for ICS */ + switch (hw->mac.type) { + case e1000_82575: + ics_mask = 0x37F47EDD; + break; + case e1000_82576: + ics_mask = 0x77D4FBFD; + break; + case e1000_82580: + ics_mask = 0x77DCFED5; + break; + case e1000_i350: + case e1000_i354: + ics_mask = 0x77DCFED5; + break; + case e1000_i210: + case e1000_i211: + ics_mask = 0x774CFED5; + break; + default: + ics_mask = 0x7FFFFFFF; + break; + } + + /* Test each interrupt */ + for (; i < 31; i++) { + /* Interrupt to test */ + mask = 1 << i; + + if (!(mask & ics_mask)) + continue; + + if (!shared_int) { + /* Disable the interrupt to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + + /* Flush any pending interrupts */ + E1000_WRITE_REG(hw, E1000_ICR, ~0); + + E1000_WRITE_REG(hw, E1000_IMC, mask); + E1000_WRITE_REG(hw, E1000_ICS, mask); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + if (adapter->test_icr & mask) { + *data = 3; + break; + } + } + + /* Enable the interrupt to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was not posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + + /* Flush any pending interrupts */ + E1000_WRITE_REG(hw, E1000_ICR, ~0); + + E1000_WRITE_REG(hw, E1000_IMS, mask); + E1000_WRITE_REG(hw, E1000_ICS, mask); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + if (!(adapter->test_icr & mask)) { + *data = 4; + break; + } + + if (!shared_int) { + /* Disable the other interrupts to be reported in + * the cause register and then force the other + * interrupts and see if any get posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + + /* Flush any pending interrupts */ + E1000_WRITE_REG(hw, E1000_ICR, ~0); + + E1000_WRITE_REG(hw, E1000_IMC, ~mask); + E1000_WRITE_REG(hw, E1000_ICS, ~mask); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + if (adapter->test_icr & mask) { + *data = 5; + break; + } + } + } + + /* Disable all the interrupts */ + E1000_WRITE_REG(hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + /* Unhook test interrupt handler */ + if (adapter->msix_entries) + free_irq(adapter->msix_entries[0].vector, adapter); + else + free_irq(irq, adapter); + + return *data; +} + +static void igb_free_desc_rings(struct igb_adapter *adapter) +{ + igb_free_tx_resources(&adapter->test_tx_ring); + igb_free_rx_resources(&adapter->test_rx_ring); +} + +static int igb_setup_desc_rings(struct igb_adapter *adapter) +{ + struct igb_ring *tx_ring = &adapter->test_tx_ring; + struct igb_ring *rx_ring = &adapter->test_rx_ring; + struct e1000_hw *hw = &adapter->hw; + int ret_val; + + /* Setup Tx descriptor ring and Tx buffers */ + tx_ring->count = IGB_DEFAULT_TXD; + tx_ring->dev = pci_dev_to_dev(adapter->pdev); + tx_ring->netdev = adapter->netdev; + tx_ring->reg_idx = adapter->vfs_allocated_count; + + if (igb_setup_tx_resources(tx_ring)) { + ret_val = 1; + goto err_nomem; + } + + igb_setup_tctl(adapter); + igb_configure_tx_ring(adapter, tx_ring); + + /* Setup Rx descriptor ring and Rx buffers */ + rx_ring->count = IGB_DEFAULT_RXD; + rx_ring->dev = pci_dev_to_dev(adapter->pdev); + rx_ring->netdev = adapter->netdev; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + rx_ring->rx_buffer_len = IGB_RX_HDR_LEN; +#endif + rx_ring->reg_idx = adapter->vfs_allocated_count; + + if (igb_setup_rx_resources(rx_ring)) { + ret_val = 2; + goto err_nomem; + } + + /* set the default queue to queue 0 of PF */ + E1000_WRITE_REG(hw, E1000_MRQC, adapter->vfs_allocated_count << 3); + + /* enable receive ring */ + igb_setup_rctl(adapter); + igb_configure_rx_ring(adapter, rx_ring); + + igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring)); + + return 0; + +err_nomem: + igb_free_desc_rings(adapter); + return ret_val; +} + +static void igb_phy_disable_receiver(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + /* Write out to PHY registers 29 and 30 to disable the Receiver. */ + e1000_write_phy_reg(hw, 29, 0x001F); + e1000_write_phy_reg(hw, 30, 0x8FFC); + e1000_write_phy_reg(hw, 29, 0x001A); + e1000_write_phy_reg(hw, 30, 0x8FF0); +} + +static int igb_integrated_phy_loopback(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_reg = 0; + + hw->mac.autoneg = FALSE; + + if (hw->phy.type == e1000_phy_m88) { + if (hw->phy.id != I210_I_PHY_ID) { + /* Auto-MDI/MDIX Off */ + e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); + /* reset to update Auto-MDI/MDIX */ + e1000_write_phy_reg(hw, PHY_CONTROL, 0x9140); + /* autoneg off */ + e1000_write_phy_reg(hw, PHY_CONTROL, 0x8140); + } else { + /* force 1000, set loopback */ + e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); + e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140); + } + } else { + /* enable MII loopback */ + if (hw->phy.type == e1000_phy_82580) + e1000_write_phy_reg(hw, I82577_PHY_LBK_CTRL, 0x8041); + } + + /* force 1000, set loopback */ + e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140); + + /* Now set up the MAC to the same speed/duplex as the PHY. */ + ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); + ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */ + ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ + E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ + E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */ + E1000_CTRL_FD | /* Force Duplex to FULL */ + E1000_CTRL_SLU); /* Set link up enable bit */ + + if (hw->phy.type == e1000_phy_m88) + ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); + + /* Disable the receiver on the PHY so when a cable is plugged in, the + * PHY does not begin to autoneg when a cable is reconnected to the NIC. + */ + if (hw->phy.type == e1000_phy_m88) + igb_phy_disable_receiver(adapter); + + mdelay(500); + return 0; +} + +static int igb_set_phy_loopback(struct igb_adapter *adapter) +{ + return igb_integrated_phy_loopback(adapter); +} + +static int igb_setup_loopback_test(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + + /* use CTRL_EXT to identify link type as SGMII can appear as copper */ + if (reg & E1000_CTRL_EXT_LINK_MODE_MASK) { + if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { + + /* Enable DH89xxCC MPHY for near end loopback */ + reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL); + reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) | + E1000_MPHY_PCS_CLK_REG_OFFSET; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg); + + reg = E1000_READ_REG(hw, E1000_MPHY_DATA); + reg |= E1000_MPHY_PCS_CLK_REG_DIGINELBEN; + E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg); + } + + reg = E1000_READ_REG(hw, E1000_RCTL); + reg |= E1000_RCTL_LBM_TCVR; + E1000_WRITE_REG(hw, E1000_RCTL, reg); + + E1000_WRITE_REG(hw, E1000_SCTL, E1000_ENABLE_SERDES_LOOPBACK); + + reg = E1000_READ_REG(hw, E1000_CTRL); + reg &= ~(E1000_CTRL_RFCE | + E1000_CTRL_TFCE | + E1000_CTRL_LRST); + reg |= E1000_CTRL_SLU | + E1000_CTRL_FD; + E1000_WRITE_REG(hw, E1000_CTRL, reg); + + /* Unset switch control to serdes energy detect */ + reg = E1000_READ_REG(hw, E1000_CONNSW); + reg &= ~E1000_CONNSW_ENRGSRC; + E1000_WRITE_REG(hw, E1000_CONNSW, reg); + + /* Unset sigdetect for SERDES loopback on + * 82580 and newer devices + */ + if (hw->mac.type >= e1000_82580) { + reg = E1000_READ_REG(hw, E1000_PCS_CFG0); + reg |= E1000_PCS_CFG_IGN_SD; + E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); + } + + /* Set PCS register for forced speed */ + reg = E1000_READ_REG(hw, E1000_PCS_LCTL); + reg &= ~E1000_PCS_LCTL_AN_ENABLE; /* Disable Autoneg*/ + reg |= E1000_PCS_LCTL_FLV_LINK_UP | /* Force link up */ + E1000_PCS_LCTL_FSV_1000 | /* Force 1000 */ + E1000_PCS_LCTL_FDV_FULL | /* SerDes Full duplex */ + E1000_PCS_LCTL_FSD | /* Force Speed */ + E1000_PCS_LCTL_FORCE_LINK; /* Force Link */ + E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); + + return 0; + } + + return igb_set_phy_loopback(adapter); +} + +static void igb_loopback_cleanup(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rctl; + u16 phy_reg; + + if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { + u32 reg; + + /* Disable near end loopback on DH89xxCC */ + reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL); + reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK ) | + E1000_MPHY_PCS_CLK_REG_OFFSET; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg); + + reg = E1000_READ_REG(hw, E1000_MPHY_DATA); + reg &= ~E1000_MPHY_PCS_CLK_REG_DIGINELBEN; + E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg); + } + + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + + hw->mac.autoneg = TRUE; + e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg); + if (phy_reg & MII_CR_LOOPBACK) { + phy_reg &= ~MII_CR_LOOPBACK; + if (hw->phy.type == I210_I_PHY_ID) + e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); + e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg); + e1000_phy_commit(hw); + } +} +static void igb_create_lbtest_frame(struct sk_buff *skb, + unsigned int frame_size) +{ + memset(skb->data, 0xFF, frame_size); + frame_size /= 2; + memset(&skb->data[frame_size], 0xAA, frame_size - 1); + memset(&skb->data[frame_size + 10], 0xBE, 1); + memset(&skb->data[frame_size + 12], 0xAF, 1); +} + +static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer, + unsigned int frame_size) +{ + unsigned char *data; + bool match = true; + + frame_size >>= 1; + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + data = rx_buffer->skb->data; +#else + data = kmap(rx_buffer->page); +#endif + + if (data[3] != 0xFF || + data[frame_size + 10] != 0xBE || + data[frame_size + 12] != 0xAF) + match = false; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + kunmap(rx_buffer->page); + +#endif + return match; +} + +static u16 igb_clean_test_rings(struct igb_ring *rx_ring, + struct igb_ring *tx_ring, + unsigned int size) +{ + union e1000_adv_rx_desc *rx_desc; + struct igb_rx_buffer *rx_buffer_info; + struct igb_tx_buffer *tx_buffer_info; + u16 rx_ntc, tx_ntc, count = 0; + + /* initialize next to clean and descriptor values */ + rx_ntc = rx_ring->next_to_clean; + tx_ntc = tx_ring->next_to_clean; + rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); + + while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { + /* check rx buffer */ + rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc]; + + /* sync Rx buffer for CPU read */ + dma_sync_single_for_cpu(rx_ring->dev, + rx_buffer_info->dma, +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + IGB_RX_HDR_LEN, +#else + IGB_RX_BUFSZ, +#endif + DMA_FROM_DEVICE); + + /* verify contents of skb */ + if (igb_check_lbtest_frame(rx_buffer_info, size)) + count++; + + /* sync Rx buffer for device write */ + dma_sync_single_for_device(rx_ring->dev, + rx_buffer_info->dma, +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + IGB_RX_HDR_LEN, +#else + IGB_RX_BUFSZ, +#endif + DMA_FROM_DEVICE); + + /* unmap buffer on tx side */ + tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); + + /* increment rx/tx next to clean counters */ + rx_ntc++; + if (rx_ntc == rx_ring->count) + rx_ntc = 0; + tx_ntc++; + if (tx_ntc == tx_ring->count) + tx_ntc = 0; + + /* fetch next descriptor */ + rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); + } + + /* re-map buffers to ring, store next to clean values */ + igb_alloc_rx_buffers(rx_ring, count); + rx_ring->next_to_clean = rx_ntc; + tx_ring->next_to_clean = tx_ntc; + + return count; +} + +static int igb_run_loopback_test(struct igb_adapter *adapter) +{ + struct igb_ring *tx_ring = &adapter->test_tx_ring; + struct igb_ring *rx_ring = &adapter->test_rx_ring; + u16 i, j, lc, good_cnt; + int ret_val = 0; + unsigned int size = IGB_RX_HDR_LEN; + netdev_tx_t tx_ret_val; + struct sk_buff *skb; + + /* allocate test skb */ + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return 11; + + /* place data into test skb */ + igb_create_lbtest_frame(skb, size); + skb_put(skb, size); + + /* + * Calculate the loop count based on the largest descriptor ring + * The idea is to wrap the largest ring a number of times using 64 + * send/receive pairs during each loop + */ + + if (rx_ring->count <= tx_ring->count) + lc = ((tx_ring->count / 64) * 2) + 1; + else + lc = ((rx_ring->count / 64) * 2) + 1; + + for (j = 0; j <= lc; j++) { /* loop count loop */ + /* reset count of good packets */ + good_cnt = 0; + + /* place 64 packets on the transmit queue*/ + for (i = 0; i < 64; i++) { + skb_get(skb); + tx_ret_val = igb_xmit_frame_ring(skb, tx_ring); + if (tx_ret_val == NETDEV_TX_OK) + good_cnt++; + } + + if (good_cnt != 64) { + ret_val = 12; + break; + } + + /* allow 200 milliseconds for packets to go from tx to rx */ + msleep(200); + + good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size); + if (good_cnt != 64) { + ret_val = 13; + break; + } + } /* end loop count loop */ + + /* free the original skb */ + kfree_skb(skb); + + return ret_val; +} + +static int igb_loopback_test(struct igb_adapter *adapter, u64 *data) +{ + /* PHY loopback cannot be performed if SoL/IDER + * sessions are active */ + if (e1000_check_reset_block(&adapter->hw)) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Cannot do PHY loopback test " + "when SoL/IDER is active.\n"); + *data = 0; + goto out; + } + if (adapter->hw.mac.type == e1000_i354) { + dev_info(&adapter->pdev->dev, + "Loopback test not supported on i354.\n"); + *data = 0; + goto out; + } + *data = igb_setup_desc_rings(adapter); + if (*data) + goto out; + *data = igb_setup_loopback_test(adapter); + if (*data) + goto err_loopback; + *data = igb_run_loopback_test(adapter); + + igb_loopback_cleanup(adapter); + +err_loopback: + igb_free_desc_rings(adapter); +out: + return *data; +} + +static int igb_link_test(struct igb_adapter *adapter, u64 *data) +{ + u32 link; + int i, time; + + *data = 0; + time = 0; + if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { + int i = 0; + adapter->hw.mac.serdes_has_link = FALSE; + + /* On some blade server designs, link establishment + * could take as long as 2-3 minutes */ + do { + e1000_check_for_link(&adapter->hw); + if (adapter->hw.mac.serdes_has_link) + goto out; + msleep(20); + } while (i++ < 3750); + + *data = 1; + } else { + for (i=0; i < IGB_MAX_LINK_TRIES; i++) { + link = igb_has_link(adapter); + if (link) + goto out; + else { + time++; + msleep(1000); + } + } + if (!link) + *data = 1; + } + out: + return *data; +} + +static void igb_diag_test(struct net_device *netdev, + struct ethtool_test *eth_test, u64 *data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + u16 autoneg_advertised; + u8 forced_speed_duplex, autoneg; + bool if_running = netif_running(netdev); + + set_bit(__IGB_TESTING, &adapter->state); + if (eth_test->flags == ETH_TEST_FL_OFFLINE) { + /* Offline tests */ + + /* save speed, duplex, autoneg settings */ + autoneg_advertised = adapter->hw.phy.autoneg_advertised; + forced_speed_duplex = adapter->hw.mac.forced_speed_duplex; + autoneg = adapter->hw.mac.autoneg; + + dev_info(pci_dev_to_dev(adapter->pdev), "offline testing starting\n"); + + /* power up link for link test */ + igb_power_up_link(adapter); + + /* Link test performed before hardware reset so autoneg doesn't + * interfere with test result */ + if (igb_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + if (if_running) + /* indicate we're in test mode */ + dev_close(netdev); + else + igb_reset(adapter); + + if (igb_reg_test(adapter, &data[0])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igb_reset(adapter); + if (igb_eeprom_test(adapter, &data[1])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igb_reset(adapter); + if (igb_intr_test(adapter, &data[2])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igb_reset(adapter); + + /* power up link for loopback test */ + igb_power_up_link(adapter); + + if (igb_loopback_test(adapter, &data[3])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* restore speed, duplex, autoneg settings */ + adapter->hw.phy.autoneg_advertised = autoneg_advertised; + adapter->hw.mac.forced_speed_duplex = forced_speed_duplex; + adapter->hw.mac.autoneg = autoneg; + + /* force this routine to wait until autoneg complete/timeout */ + adapter->hw.phy.autoneg_wait_to_complete = TRUE; + igb_reset(adapter); + adapter->hw.phy.autoneg_wait_to_complete = FALSE; + + clear_bit(__IGB_TESTING, &adapter->state); + if (if_running) + dev_open(netdev); + } else { + dev_info(pci_dev_to_dev(adapter->pdev), "online testing starting\n"); + + /* PHY is powered down when interface is down */ + if (if_running && igb_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + else + data[4] = 0; + + /* Online tests aren't run; pass by default */ + data[0] = 0; + data[1] = 0; + data[2] = 0; + data[3] = 0; + + clear_bit(__IGB_TESTING, &adapter->state); + } + msleep_interruptible(4 * 1000); +} + +static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + wol->supported = WAKE_UCAST | WAKE_MCAST | + WAKE_BCAST | WAKE_MAGIC | + WAKE_PHY; + wol->wolopts = 0; + + if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) + return; + + /* apply any specific unsupported masks here */ + switch (adapter->hw.device_id) { + default: + break; + } + + if (adapter->wol & E1000_WUFC_EX) + wol->wolopts |= WAKE_UCAST; + if (adapter->wol & E1000_WUFC_MC) + wol->wolopts |= WAKE_MCAST; + if (adapter->wol & E1000_WUFC_BC) + wol->wolopts |= WAKE_BCAST; + if (adapter->wol & E1000_WUFC_MAG) + wol->wolopts |= WAKE_MAGIC; + if (adapter->wol & E1000_WUFC_LNKC) + wol->wolopts |= WAKE_PHY; +} + +static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE)) + return -EOPNOTSUPP; + + if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) + return wol->wolopts ? -EOPNOTSUPP : 0; + + /* these settings will always override what we currently have */ + adapter->wol = 0; + + if (wol->wolopts & WAKE_UCAST) + adapter->wol |= E1000_WUFC_EX; + if (wol->wolopts & WAKE_MCAST) + adapter->wol |= E1000_WUFC_MC; + if (wol->wolopts & WAKE_BCAST) + adapter->wol |= E1000_WUFC_BC; + if (wol->wolopts & WAKE_MAGIC) + adapter->wol |= E1000_WUFC_MAG; + if (wol->wolopts & WAKE_PHY) + adapter->wol |= E1000_WUFC_LNKC; + device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + return 0; +} + +/* bit defines for adapter->led_status */ +#ifdef HAVE_ETHTOOL_SET_PHYS_ID +static int igb_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + e1000_blink_led(hw); + return 2; + case ETHTOOL_ID_ON: + e1000_led_on(hw); + break; + case ETHTOOL_ID_OFF: + e1000_led_off(hw); + break; + case ETHTOOL_ID_INACTIVE: + e1000_led_off(hw); + e1000_cleanup_led(hw); + break; + } + + return 0; +} +#else +static int igb_phys_id(struct net_device *netdev, u32 data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + unsigned long timeout; + + timeout = data * 1000; + + /* + * msleep_interruptable only accepts unsigned int so we are limited + * in how long a duration we can wait + */ + if (!timeout || timeout > UINT_MAX) + timeout = UINT_MAX; + + e1000_blink_led(hw); + msleep_interruptible(timeout); + + e1000_led_off(hw); + e1000_cleanup_led(hw); + + return 0; +} +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ + +static int igb_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + int i; + + if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) || + ((ec->rx_coalesce_usecs > 3) && + (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) || + (ec->rx_coalesce_usecs == 2)) + { + printk("set_coalesce:invalid parameter.."); + return -EINVAL; + } + + if ((ec->tx_coalesce_usecs > IGB_MAX_ITR_USECS) || + ((ec->tx_coalesce_usecs > 3) && + (ec->tx_coalesce_usecs < IGB_MIN_ITR_USECS)) || + (ec->tx_coalesce_usecs == 2)) + return -EINVAL; + + if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs) + return -EINVAL; + + if (ec->tx_max_coalesced_frames_irq) + adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq; + + /* If ITR is disabled, disable DMAC */ + if (ec->rx_coalesce_usecs == 0) { + adapter->dmac = IGB_DMAC_DISABLE; + } + + /* convert to rate of irq's per second */ + if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3) + adapter->rx_itr_setting = ec->rx_coalesce_usecs; + else + adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; + + /* convert to rate of irq's per second */ + if (adapter->flags & IGB_FLAG_QUEUE_PAIRS) + adapter->tx_itr_setting = adapter->rx_itr_setting; + else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3) + adapter->tx_itr_setting = ec->tx_coalesce_usecs; + else + adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igb_q_vector *q_vector = adapter->q_vector[i]; + q_vector->tx.work_limit = adapter->tx_work_limit; + if (q_vector->rx.ring) + q_vector->itr_val = adapter->rx_itr_setting; + else + q_vector->itr_val = adapter->tx_itr_setting; + if (q_vector->itr_val && q_vector->itr_val <= 3) + q_vector->itr_val = IGB_START_ITR; + q_vector->set_itr = 1; + } + + return 0; +} + +static int igb_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (adapter->rx_itr_setting <= 3) + ec->rx_coalesce_usecs = adapter->rx_itr_setting; + else + ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; + + ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit; + + if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) { + if (adapter->tx_itr_setting <= 3) + ec->tx_coalesce_usecs = adapter->tx_itr_setting; + else + ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; + } + + return 0; +} + +static int igb_nway_reset(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + if (netif_running(netdev)) + igb_reinit_locked(adapter); + return 0; +} + +#ifdef HAVE_ETHTOOL_GET_SSET_COUNT +static int igb_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return IGB_STATS_LEN; + case ETH_SS_TEST: + return IGB_TEST_LEN; + default: + return -ENOTSUPP; + } +} +#else +static int igb_get_stats_count(struct net_device *netdev) +{ + return IGB_STATS_LEN; +} + +static int igb_diag_test_count(struct net_device *netdev) +{ + return IGB_TEST_LEN; +} +#endif + +static void igb_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif + u64 *queue_stat; + int i, j, k; + char *p; + + igb_update_stats(adapter); + + for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { + p = (char *)adapter + igb_gstrings_stats[i].stat_offset; + data[i] = (igb_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < IGB_NETDEV_STATS_LEN; j++, i++) { + p = (char *)net_stats + igb_gstrings_net_stats[j].stat_offset; + data[i] = (igb_gstrings_net_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < adapter->num_tx_queues; j++) { + queue_stat = (u64 *)&adapter->tx_ring[j]->tx_stats; + for (k = 0; k < IGB_TX_QUEUE_STATS_LEN; k++, i++) + data[i] = queue_stat[k]; + } + for (j = 0; j < adapter->num_rx_queues; j++) { + queue_stat = (u64 *)&adapter->rx_ring[j]->rx_stats; + for (k = 0; k < IGB_RX_QUEUE_STATS_LEN; k++, i++) + data[i] = queue_stat[k]; + } +} + +static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_TEST: + memcpy(data, *igb_gstrings_test, + IGB_TEST_LEN*ETH_GSTRING_LEN); + break; + case ETH_SS_STATS: + for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { + memcpy(p, igb_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) { + memcpy(p, igb_gstrings_net_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_tx_queues; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_restart", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_rx_queues; i++) { + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_drops", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_csum_err", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_alloc_failed", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv4_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv4e_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv6_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv6e_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_tcp_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_udp_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_sctp_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_nfs_packets", i); + p += ETH_GSTRING_LEN; + } +/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */ + break; + } +} + +#ifdef HAVE_ETHTOOL_GET_TS_INFO +static int igb_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (adapter->hw.mac.type) { +#ifdef HAVE_PTP_1588_CLOCK + case e1000_82575: + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + return 0; + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + if (adapter->ptp_clock) + info->phc_index = ptp_clock_index(adapter->ptp_clock); + else + info->phc_index = -1; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = 1 << HWTSTAMP_FILTER_NONE; + + /* 82576 does not support timestamping all packets. */ + if (adapter->hw.mac.type >= e1000_82580) + info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL; + else + info->rx_filters |= + (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +#endif /* HAVE_PTP_1588_CLOCK */ + default: + return -EOPNOTSUPP; + } +} +#endif /* HAVE_ETHTOOL_GET_TS_INFO */ + +#ifdef CONFIG_PM_RUNTIME +static int igb_ethtool_begin(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + pm_runtime_get_sync(&adapter->pdev->dev); + + return 0; +} + +static void igb_ethtool_complete(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + pm_runtime_put(&adapter->pdev->dev); +} +#endif /* CONFIG_PM_RUNTIME */ + +#ifndef HAVE_NDO_SET_FEATURES +static u32 igb_get_rx_csum(struct net_device *netdev) +{ + return !!(netdev->features & NETIF_F_RXCSUM); +} + +static int igb_set_rx_csum(struct net_device *netdev, u32 data) +{ + const u32 feature_list = NETIF_F_RXCSUM; + + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + + return 0; +} + +static int igb_set_tx_csum(struct net_device *netdev, u32 data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); +#ifdef NETIF_F_IPV6_CSUM + u32 feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; +#else + u32 feature_list = NETIF_F_IP_CSUM; +#endif + + if (adapter->hw.mac.type >= e1000_82576) + feature_list |= NETIF_F_SCTP_CSUM; + + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + + return 0; +} + +#ifdef NETIF_F_TSO +static int igb_set_tso(struct net_device *netdev, u32 data) +{ +#ifdef NETIF_F_TSO6 + const u32 feature_list = NETIF_F_TSO | NETIF_F_TSO6; +#else + const u32 feature_list = NETIF_F_TSO; +#endif + + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + +#ifndef HAVE_NETDEV_VLAN_FEATURES + if (!data) { + struct igb_adapter *adapter = netdev_priv(netdev); + struct net_device *v_netdev; + int i; + + /* disable TSO on all VLANs if they're present */ + if (!adapter->vlgrp) + goto tso_out; + + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + v_netdev = vlan_group_get_device(adapter->vlgrp, i); + if (!v_netdev) + continue; + + v_netdev->features &= ~feature_list; + vlan_group_set_device(adapter->vlgrp, i, v_netdev); + } + } + +tso_out: + +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + return 0; +} + +#endif /* NETIF_F_TSO */ +#ifdef ETHTOOL_GFLAGS +static int igb_set_flags(struct net_device *netdev, u32 data) +{ + u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | + ETH_FLAG_RXHASH; +#ifndef HAVE_VLAN_RX_REGISTER + u32 changed = netdev->features ^ data; +#endif + int rc; +#ifndef IGB_NO_LRO + + supported_flags |= ETH_FLAG_LRO; +#endif + /* + * Since there is no support for separate tx vlan accel + * enabled make sure tx flag is cleared if rx is. + */ + if (!(data & ETH_FLAG_RXVLAN)) + data &= ~ETH_FLAG_TXVLAN; + + rc = ethtool_op_set_flags(netdev, data, supported_flags); + if (rc) + return rc; +#ifndef HAVE_VLAN_RX_REGISTER + + if (changed & ETH_FLAG_RXVLAN) + igb_vlan_mode(netdev, data); +#endif + + return 0; +} + +#endif /* ETHTOOL_GFLAGS */ +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_SADV_COAL +static int igb_set_adv_coal(struct net_device *netdev, struct ethtool_value *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + switch (edata->data) { + case IGB_DMAC_DISABLE: + adapter->dmac = edata->data; + break; + case IGB_DMAC_MIN: + adapter->dmac = edata->data; + break; + case IGB_DMAC_500: + adapter->dmac = edata->data; + break; + case IGB_DMAC_EN_DEFAULT: + adapter->dmac = edata->data; + break; + case IGB_DMAC_2000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_3000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_4000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_5000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_6000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_7000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_8000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_9000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_MAX: + adapter->dmac = edata->data; + break; + default: + adapter->dmac = IGB_DMAC_DISABLE; + printk("set_dmac: invalid setting, setting DMAC to %d\n", + adapter->dmac); + } + printk("%s: setting DMAC to %d\n", netdev->name, adapter->dmac); + return 0; +} +#endif /* ETHTOOL_SADV_COAL */ +#ifdef ETHTOOL_GADV_COAL +static void igb_get_dmac(struct net_device *netdev, + struct ethtool_value *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + edata->data = adapter->dmac; + + return; +} +#endif + +#ifdef ETHTOOL_GEEE +static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 ret_val; + u16 phy_data; + + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) + return -EOPNOTSUPP; + + edata->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full); + + if (!hw->dev_spec._82575.eee_disable) + edata->advertised = + mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); + + /* The IPCNFG and EEER registers are not supported on I354. */ + if (hw->mac.type == e1000_i354) { + e1000_get_eee_status_i354(hw, (bool *)&edata->eee_active); + } else { + u32 eeer; + + eeer = E1000_READ_REG(hw, E1000_EEER); + + /* EEE status on negotiated link */ + if (eeer & E1000_EEER_EEE_NEG) + edata->eee_active = true; + + if (eeer & E1000_EEER_TX_LPI_EN) + edata->tx_lpi_enabled = true; + } + + /* EEE Link Partner Advertised */ + switch (hw->mac.type) { + case e1000_i350: + ret_val = e1000_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350, + &phy_data); + if (ret_val) + return -ENODATA; + + edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + + break; + case e1000_i354: + case e1000_i210: + case e1000_i211: + ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210, + E1000_EEE_LP_ADV_DEV_I210, + &phy_data); + if (ret_val) + return -ENODATA; + + edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + + break; + default: + break; + } + + edata->eee_enabled = !hw->dev_spec._82575.eee_disable; + + if ((hw->mac.type == e1000_i354) && + (edata->eee_enabled)) + edata->tx_lpi_enabled = true; + + /* + * report correct negotiated EEE status for devices that + * wrongly report EEE at half-duplex + */ + if (adapter->link_duplex == HALF_DUPLEX) { + edata->eee_enabled = false; + edata->eee_active = false; + edata->tx_lpi_enabled = false; + edata->advertised &= ~edata->advertised; + } + + return 0; +} +#endif + +#ifdef ETHTOOL_SEEE +static int igb_set_eee(struct net_device *netdev, + struct ethtool_eee *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct ethtool_eee eee_curr; + s32 ret_val; + + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) + return -EOPNOTSUPP; + + ret_val = igb_get_eee(netdev, &eee_curr); + if (ret_val) + return ret_val; + + if (eee_curr.eee_enabled) { + if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Setting EEE tx-lpi is not supported\n"); + return -EINVAL; + } + + /* Tx LPI time is not implemented currently */ + if (edata->tx_lpi_timer) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Setting EEE Tx LPI timer is not supported\n"); + return -EINVAL; + } + + if (edata->advertised & + ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) { + dev_err(pci_dev_to_dev(adapter->pdev), + "EEE Advertisement supports only 100Tx and or 100T full duplex\n"); + return -EINVAL; + } + + } else if (!edata->eee_enabled) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Setting EEE options is not supported with EEE disabled\n"); + return -EINVAL; + } + + adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); + + if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) { + hw->dev_spec._82575.eee_disable = !edata->eee_enabled; + + /* reset link */ + if (netif_running(netdev)) + igb_reinit_locked(adapter); + else + igb_reset(adapter); + } + + return 0; +} +#endif /* ETHTOOL_SEEE */ + +#ifdef ETHTOOL_GRXRINGS +static int igb_get_rss_hash_opts(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + cmd->data = 0; + + /* Report default options for RSS on igb */ + switch (cmd->flow_type) { + case TCP_V4_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V4_FLOW: + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case IPV4_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case TCP_V6_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V6_FLOW: + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V6_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case IPV6_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, +#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS + void *rule_locs) +#else + u32 *rule_locs) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = adapter->num_rx_queues; + ret = 0; + break; + case ETHTOOL_GRXFH: + ret = igb_get_rss_hash_opts(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +#define UDP_RSS_FLAGS (IGB_FLAG_RSS_FIELD_IPV4_UDP | \ + IGB_FLAG_RSS_FIELD_IPV6_UDP) +static int igb_set_rss_hash_opt(struct igb_adapter *adapter, + struct ethtool_rxnfc *nfc) +{ + u32 flags = adapter->flags; + + /* + * RSS does not support anything other than hashing + * to queues on src and dst IPs and ports + */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + !(nfc->data & RXH_L4_B_0_1) || + !(nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + case UDP_V4_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags &= ~IGB_FLAG_RSS_FIELD_IPV4_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags |= IGB_FLAG_RSS_FIELD_IPV4_UDP; + break; + default: + return -EINVAL; + } + break; + case UDP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags &= ~IGB_FLAG_RSS_FIELD_IPV6_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags |= IGB_FLAG_RSS_FIELD_IPV6_UDP; + break; + default: + return -EINVAL; + } + break; + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + (nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* if we changed something we need to update flags */ + if (flags != adapter->flags) { + struct e1000_hw *hw = &adapter->hw; + u32 mrqc = E1000_READ_REG(hw, E1000_MRQC); + + if ((flags & UDP_RSS_FLAGS) && + !(adapter->flags & UDP_RSS_FLAGS)) + DPRINTK(DRV, WARNING, + "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n"); + + adapter->flags = flags; + + /* Perform hash on these packet types */ + mrqc |= E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP; + + mrqc &= ~(E1000_MRQC_RSS_FIELD_IPV4_UDP | + E1000_MRQC_RSS_FIELD_IPV6_UDP); + + if (flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; + + if (flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; + + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); + } + + return 0; +} + +static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct igb_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + ret = igb_set_rss_hash_opt(adapter, cmd); + break; + default: + break; + } + + return ret; +} +#endif /* ETHTOOL_GRXRINGS */ + +static const struct ethtool_ops igb_ethtool_ops = { + .get_settings = igb_get_settings, + .set_settings = igb_set_settings, + .get_drvinfo = igb_get_drvinfo, + .get_regs_len = igb_get_regs_len, + .get_regs = igb_get_regs, + .get_wol = igb_get_wol, + .set_wol = igb_set_wol, + .get_msglevel = igb_get_msglevel, + .set_msglevel = igb_set_msglevel, + .nway_reset = igb_nway_reset, + .get_link = igb_get_link, + .get_eeprom_len = igb_get_eeprom_len, + .get_eeprom = igb_get_eeprom, + .set_eeprom = igb_set_eeprom, + .get_ringparam = igb_get_ringparam, + .set_ringparam = igb_set_ringparam, + .get_pauseparam = igb_get_pauseparam, + .set_pauseparam = igb_set_pauseparam, + .self_test = igb_diag_test, + .get_strings = igb_get_strings, +#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#ifdef HAVE_ETHTOOL_SET_PHYS_ID + .set_phys_id = igb_set_phys_id, +#else + .phys_id = igb_phys_id, +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#ifdef HAVE_ETHTOOL_GET_SSET_COUNT + .get_sset_count = igb_get_sset_count, +#else + .get_stats_count = igb_get_stats_count, + .self_test_count = igb_diag_test_count, +#endif + .get_ethtool_stats = igb_get_ethtool_stats, +#ifdef HAVE_ETHTOOL_GET_PERM_ADDR + .get_perm_addr = ethtool_op_get_perm_addr, +#endif + .get_coalesce = igb_get_coalesce, + .set_coalesce = igb_set_coalesce, +#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#ifdef HAVE_ETHTOOL_GET_TS_INFO + .get_ts_info = igb_get_ts_info, +#endif /* HAVE_ETHTOOL_GET_TS_INFO */ +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#ifdef CONFIG_PM_RUNTIME + .begin = igb_ethtool_begin, + .complete = igb_ethtool_complete, +#endif /* CONFIG_PM_RUNTIME */ +#ifndef HAVE_NDO_SET_FEATURES + .get_rx_csum = igb_get_rx_csum, + .set_rx_csum = igb_set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = igb_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, +#ifdef NETIF_F_TSO + .get_tso = ethtool_op_get_tso, + .set_tso = igb_set_tso, +#endif +#ifdef ETHTOOL_GFLAGS + .get_flags = ethtool_op_get_flags, + .set_flags = igb_set_flags, +#endif /* ETHTOOL_GFLAGS */ +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_GADV_COAL + .get_advcoal = igb_get_adv_coal, + .set_advcoal = igb_set_dmac_coal, +#endif /* ETHTOOL_GADV_COAL */ +#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#ifdef ETHTOOL_GEEE + .get_eee = igb_get_eee, +#endif +#ifdef ETHTOOL_SEEE + .set_eee = igb_set_eee, +#endif +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#ifdef ETHTOOL_GRXRINGS + .get_rxnfc = igb_get_rxnfc, + .set_rxnfc = igb_set_rxnfc, +#endif +}; + +#ifdef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +static const struct ethtool_ops_ext igb_ethtool_ops_ext = { + .size = sizeof(struct ethtool_ops_ext), + .get_ts_info = igb_get_ts_info, + .set_phys_id = igb_set_phys_id, + .get_eee = igb_get_eee, + .set_eee = igb_set_eee, +}; + +void igb_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops); + set_ethtool_ops_ext(netdev, &igb_ethtool_ops_ext); +} +#else +void igb_set_ethtool_ops(struct net_device *netdev) +{ + /* have to "undeclare" const on this struct to remove warnings */ + SET_ETHTOOL_OPS(netdev, (struct ethtool_ops *)&igb_ethtool_ops); +} +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#endif /* SIOCETHTOOL */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c new file mode 100644 index 00000000..07a1ae07 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c @@ -0,0 +1,260 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" +#include "e1000_82575.h" +#include "e1000_hw.h" +#ifdef IGB_HWMON +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_I2C_SUPPORT +static struct i2c_board_info i350_sensor_info = { + I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)), +}; +#endif /* HAVE_I2C_SUPPORT */ + +/* hwmon callback functions */ +static ssize_t igb_hwmon_show_location(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + return sprintf(buf, "loc%u\n", + igb_attr->sensor->location); +} + +static ssize_t igb_hwmon_show_temp(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + unsigned int value; + + /* reset the temp field */ + igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw); + + value = igb_attr->sensor->temp; + + /* display millidegree */ + value *= 1000; + + return sprintf(buf, "%u\n", value); +} + +static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + unsigned int value = igb_attr->sensor->caution_thresh; + + /* display millidegree */ + value *= 1000; + + return sprintf(buf, "%u\n", value); +} + +static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + unsigned int value = igb_attr->sensor->max_op_thresh; + + /* display millidegree */ + value *= 1000; + + return sprintf(buf, "%u\n", value); +} + +/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file. + * @ adapter: pointer to the adapter structure + * @ offset: offset in the eeprom sensor data table + * @ type: type of sensor data to display + * + * For each file we want in hwmon's sysfs interface we need a device_attribute + * This is included in our hwmon_attr struct that contains the references to + * the data structures we need to get the data to display. + */ +static int igb_add_hwmon_attr(struct igb_adapter *adapter, + unsigned int offset, int type) { + int rc; + unsigned int n_attr; + struct hwmon_attr *igb_attr; + + n_attr = adapter->igb_hwmon_buff.n_hwmon; + igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr]; + + switch (type) { + case IGB_HWMON_TYPE_LOC: + igb_attr->dev_attr.show = igb_hwmon_show_location; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_label", offset); + break; + case IGB_HWMON_TYPE_TEMP: + igb_attr->dev_attr.show = igb_hwmon_show_temp; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_input", offset); + break; + case IGB_HWMON_TYPE_CAUTION: + igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_max", offset); + break; + case IGB_HWMON_TYPE_MAX: + igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_crit", offset); + break; + default: + rc = -EPERM; + return rc; + } + + /* These always the same regardless of type */ + igb_attr->sensor = + &adapter->hw.mac.thermal_sensor_data.sensor[offset]; + igb_attr->hw = &adapter->hw; + igb_attr->dev_attr.store = NULL; + igb_attr->dev_attr.attr.mode = S_IRUGO; + igb_attr->dev_attr.attr.name = igb_attr->name; + sysfs_attr_init(&igb_attr->dev_attr.attr); + rc = device_create_file(&adapter->pdev->dev, + &igb_attr->dev_attr); + if (rc == 0) + ++adapter->igb_hwmon_buff.n_hwmon; + + return rc; +} + +static void igb_sysfs_del_adapter(struct igb_adapter *adapter) +{ + int i; + + if (adapter == NULL) + return; + + for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) { + device_remove_file(&adapter->pdev->dev, + &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr); + } + + kfree(adapter->igb_hwmon_buff.hwmon_list); + + if (adapter->igb_hwmon_buff.device) + hwmon_device_unregister(adapter->igb_hwmon_buff.device); +} + +/* called from igb_main.c */ +void igb_sysfs_exit(struct igb_adapter *adapter) +{ + igb_sysfs_del_adapter(adapter); +} + +/* called from igb_main.c */ +int igb_sysfs_init(struct igb_adapter *adapter) +{ + struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff; + unsigned int i; + int n_attrs; + int rc = 0; +#ifdef HAVE_I2C_SUPPORT + struct i2c_client *client = NULL; +#endif /* HAVE_I2C_SUPPORT */ + + /* If this method isn't defined we don't support thermals */ + if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL) + goto exit; + + /* Don't create thermal hwmon interface if no sensors present */ + rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw)); + if (rc) + goto exit; +#ifdef HAVE_I2C_SUPPORT + /* init i2c_client */ + client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info); + if (client == NULL) { + dev_info(&adapter->pdev->dev, + "Failed to create new i2c device..\n"); + goto exit; + } + adapter->i2c_client = client; +#endif /* HAVE_I2C_SUPPORT */ + + /* Allocation space for max attributes + * max num sensors * values (loc, temp, max, caution) + */ + n_attrs = E1000_MAX_SENSORS * 4; + igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr), + GFP_KERNEL); + if (!igb_hwmon->hwmon_list) { + rc = -ENOMEM; + goto err; + } + + igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev); + if (IS_ERR(igb_hwmon->device)) { + rc = PTR_ERR(igb_hwmon->device); + goto err; + } + + for (i = 0; i < E1000_MAX_SENSORS; i++) { + + /* Only create hwmon sysfs entries for sensors that have + * meaningful data. + */ + if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0) + continue; + + /* Bail if any hwmon attr struct fails to initialize */ + rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION); + rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC); + rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP); + rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX); + if (rc) + goto err; + } + + goto exit; + +err: + igb_sysfs_del_adapter(adapter); +exit: + return rc; +} +#endif /* IGB_HWMON */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c new file mode 100644 index 00000000..96acec58 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -0,0 +1,10291 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#ifdef NETIF_F_TSO +#include +#ifdef NETIF_F_TSO6 +#include +#include +#endif +#endif +#ifdef SIOCGMIIPHY +#include +#endif +#ifdef SIOCETHTOOL +#include +#endif +#include +#ifdef CONFIG_PM_RUNTIME +#include +#endif /* CONFIG_PM_RUNTIME */ + +#include +#include "igb.h" +#include "igb_vmdq.h" + +#include + +#if defined(DEBUG) || defined (DEBUG_DUMP) || defined (DEBUG_ICR) || defined(DEBUG_ITR) +#define DRV_DEBUG "_debug" +#else +#define DRV_DEBUG +#endif +#define DRV_HW_PERF +#define VERSION_SUFFIX + +#define MAJ 5 +#define MIN 0 +#define BUILD 6 +#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF + +char igb_driver_name[] = "igb"; +char igb_driver_version[] = DRV_VERSION; +static const char igb_driver_string[] = + "Intel(R) Gigabit Ethernet Network Driver"; +static const char igb_copyright[] = + "Copyright (c) 2007-2013 Intel Corporation."; + +static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) }, + /* required last entry */ + {0, } +}; + +//MODULE_DEVICE_TABLE(pci, igb_pci_tbl); +static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__)); +void igb_reset(struct igb_adapter *); +static int igb_setup_all_tx_resources(struct igb_adapter *); +static int igb_setup_all_rx_resources(struct igb_adapter *); +static void igb_free_all_tx_resources(struct igb_adapter *); +static void igb_free_all_rx_resources(struct igb_adapter *); +static void igb_setup_mrqc(struct igb_adapter *); +void igb_update_stats(struct igb_adapter *); +static int igb_probe(struct pci_dev *, const struct pci_device_id *); +static void __devexit igb_remove(struct pci_dev *pdev); +static int igb_sw_init(struct igb_adapter *); +static int igb_open(struct net_device *); +static int igb_close(struct net_device *); +static void igb_configure(struct igb_adapter *); +static void igb_configure_tx(struct igb_adapter *); +static void igb_configure_rx(struct igb_adapter *); +static void igb_clean_all_tx_rings(struct igb_adapter *); +static void igb_clean_all_rx_rings(struct igb_adapter *); +static void igb_clean_tx_ring(struct igb_ring *); +static void igb_set_rx_mode(struct net_device *); +static void igb_update_phy_info(unsigned long); +static void igb_watchdog(unsigned long); +static void igb_watchdog_task(struct work_struct *); +static void igb_dma_err_task(struct work_struct *); +static void igb_dma_err_timer(unsigned long data); +static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); +static struct net_device_stats *igb_get_stats(struct net_device *); +static int igb_change_mtu(struct net_device *, int); +void igb_full_sync_mac_table(struct igb_adapter *adapter); +static int igb_set_mac(struct net_device *, void *); +static void igb_set_uta(struct igb_adapter *adapter); +static irqreturn_t igb_intr(int irq, void *); +static irqreturn_t igb_intr_msi(int irq, void *); +static irqreturn_t igb_msix_other(int irq, void *); +static irqreturn_t igb_msix_ring(int irq, void *); +#ifdef IGB_DCA +static void igb_update_dca(struct igb_q_vector *); +static void igb_setup_dca(struct igb_adapter *); +#endif /* IGB_DCA */ +static int igb_poll(struct napi_struct *, int); +static bool igb_clean_tx_irq(struct igb_q_vector *); +static bool igb_clean_rx_irq(struct igb_q_vector *, int); +static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); +static void igb_tx_timeout(struct net_device *); +static void igb_reset_task(struct work_struct *); +#ifdef HAVE_VLAN_RX_REGISTER +static void igb_vlan_mode(struct net_device *, struct vlan_group *); +#endif +#ifdef HAVE_VLAN_PROTOCOL +static int igb_vlan_rx_add_vid(struct net_device *, + __be16 proto, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, + __be16 proto, u16); +#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef NETIF_F_HW_VLAN_CTAG_RX +static int igb_vlan_rx_add_vid(struct net_device *, + __always_unused __be16 proto, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, + __always_unused __be16 proto, u16); +#else +static int igb_vlan_rx_add_vid(struct net_device *, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, u16); +#endif +#else +static void igb_vlan_rx_add_vid(struct net_device *, u16); +static void igb_vlan_rx_kill_vid(struct net_device *, u16); +#endif +static void igb_restore_vlan(struct igb_adapter *); +void igb_rar_set(struct igb_adapter *adapter, u32 index); +static void igb_ping_all_vfs(struct igb_adapter *); +static void igb_msg_task(struct igb_adapter *); +static void igb_vmm_control(struct igb_adapter *); +static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); +static void igb_restore_vf_multicasts(struct igb_adapter *adapter); +static void igb_process_mdd_event(struct igb_adapter *); +#ifdef IFLA_VF_MAX +static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac); +static int igb_ndo_set_vf_vlan(struct net_device *netdev, + int vf, u16 vlan, u8 qos); +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting); +#endif +#ifdef HAVE_VF_MIN_MAX_TXRATE +static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); +#else /* HAVE_VF_MIN_MAX_TXRATE */ +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +#endif /* HAVE_VF_MIN_MAX_TXRATE */ +static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, + struct ifla_vf_info *ivi); +static void igb_check_vf_rate_limit(struct igb_adapter *); +#endif +static int igb_vf_configure(struct igb_adapter *adapter, int vf); +#ifdef CONFIG_PM +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_suspend(struct device *dev); +static int igb_resume(struct device *dev); +#ifdef CONFIG_PM_RUNTIME +static int igb_runtime_suspend(struct device *dev); +static int igb_runtime_resume(struct device *dev); +static int igb_runtime_idle(struct device *dev); +#endif /* CONFIG_PM_RUNTIME */ +static const struct dev_pm_ops igb_pm_ops = { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) + .suspend = igb_suspend, + .resume = igb_resume, + .freeze = igb_suspend, + .thaw = igb_resume, + .poweroff = igb_suspend, + .restore = igb_resume, +#ifdef CONFIG_PM_RUNTIME + .runtime_suspend = igb_runtime_suspend, + .runtime_resume = igb_runtime_resume, + .runtime_idle = igb_runtime_idle, +#endif +#else /* Linux >= 2.6.34 */ + SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) +#ifdef CONFIG_PM_RUNTIME + SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, + igb_runtime_idle) +#endif /* CONFIG_PM_RUNTIME */ +#endif /* Linux version */ +}; +#else +static int igb_suspend(struct pci_dev *pdev, pm_message_t state); +static int igb_resume(struct pci_dev *pdev); +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +#endif /* CONFIG_PM */ +#ifndef USE_REBOOT_NOTIFIER +static void igb_shutdown(struct pci_dev *); +#else +static int igb_notify_reboot(struct notifier_block *, unsigned long, void *); +static struct notifier_block igb_notifier_reboot = { + .notifier_call = igb_notify_reboot, + .next = NULL, + .priority = 0 +}; +#endif +#ifdef IGB_DCA +static int igb_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = igb_notify_dca, + .next = NULL, + .priority = 0 +}; +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER +/* for netdump / net console */ +static void igb_netpoll(struct net_device *); +#endif + +#ifdef HAVE_PCI_ERS +static pci_ers_result_t igb_io_error_detected(struct pci_dev *, + pci_channel_state_t); +static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); +static void igb_io_resume(struct pci_dev *); + +static struct pci_error_handlers igb_err_handler = { + .error_detected = igb_io_error_detected, + .slot_reset = igb_io_slot_reset, + .resume = igb_io_resume, +}; +#endif + +static void igb_init_fw(struct igb_adapter *adapter); +static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); + +static struct pci_driver igb_driver = { + .name = igb_driver_name, + .id_table = igb_pci_tbl, + .probe = igb_probe, + .remove = __devexit_p(igb_remove), +#ifdef CONFIG_PM +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS + .driver.pm = &igb_pm_ops, +#else + .suspend = igb_suspend, + .resume = igb_resume, +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +#endif /* CONFIG_PM */ +#ifndef USE_REBOOT_NOTIFIER + .shutdown = igb_shutdown, +#endif +#ifdef HAVE_PCI_ERS + .err_handler = &igb_err_handler +#endif +}; + +//MODULE_AUTHOR("Intel Corporation, "); +//MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); +//MODULE_LICENSE("GPL"); +//MODULE_VERSION(DRV_VERSION); + +static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie; + u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; + u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); + u32 vfta; + + /* + * if this is the management vlan the only option is to add it in so + * that the management pass through will continue to work + */ + if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && + (vid == mng_cookie->vlan_id)) + add = TRUE; + + vfta = adapter->shadow_vfta[index]; + + if (add) + vfta |= mask; + else + vfta &= ~mask; + + e1000_write_vfta(hw, index, vfta); + adapter->shadow_vfta[index] = vfta; +} + +static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE; +//module_param(debug, int, 0); +//MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)"); + +/** + * igb_init_module - Driver Registration Routine + * + * igb_init_module is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. + **/ +static int __init igb_init_module(void) +{ + int ret; + + printk(KERN_INFO "%s - version %s\n", + igb_driver_string, igb_driver_version); + + printk(KERN_INFO "%s\n", igb_copyright); +#ifdef IGB_HWMON +/* only use IGB_PROCFS if IGB_HWMON is not defined */ +#else +#ifdef IGB_PROCFS + if (igb_procfs_topdir_init()) + printk(KERN_INFO "Procfs failed to initialize topdir\n"); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + +#ifdef IGB_DCA + dca_register_notify(&dca_notifier); +#endif + ret = pci_register_driver(&igb_driver); +#ifdef USE_REBOOT_NOTIFIER + if (ret >= 0) { + register_reboot_notifier(&igb_notifier_reboot); + } +#endif + return ret; +} + +#undef module_init +#define module_init(x) static int x(void) __attribute__((__unused__)); +module_init(igb_init_module); + +/** + * igb_exit_module - Driver Exit Cleanup Routine + * + * igb_exit_module is called just before the driver is removed + * from memory. + **/ +static void __exit igb_exit_module(void) +{ +#ifdef IGB_DCA + dca_unregister_notify(&dca_notifier); +#endif +#ifdef USE_REBOOT_NOTIFIER + unregister_reboot_notifier(&igb_notifier_reboot); +#endif + pci_unregister_driver(&igb_driver); + +#ifdef IGB_HWMON +/* only compile IGB_PROCFS if IGB_HWMON is not defined */ +#else +#ifdef IGB_PROCFS + igb_procfs_topdir_exit(); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ +} + +#undef module_exit +#define module_exit(x) static void x(void) __attribute__((__unused__)); +module_exit(igb_exit_module); + +#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) +/** + * igb_cache_ring_register - Descriptor ring to register mapping + * @adapter: board private structure to initialize + * + * Once we know the feature-set enabled for the device, we'll cache + * the register offset the descriptor ring is assigned to. + **/ +static void igb_cache_ring_register(struct igb_adapter *adapter) +{ + int i = 0, j = 0; + u32 rbase_offset = adapter->vfs_allocated_count; + + switch (adapter->hw.mac.type) { + case e1000_82576: + /* The queues are allocated for virtualization such that VF 0 + * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. + * In order to avoid collision we start at the first free queue + * and continue consuming queues in the same sequence + */ + if ((adapter->rss_queues > 1) && adapter->vmdq_pools) { + for (; i < adapter->rss_queues; i++) + adapter->rx_ring[i]->reg_idx = rbase_offset + + Q_IDX_82576(i); + } + case e1000_82575: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + default: + for (; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->reg_idx = rbase_offset + i; + for (; j < adapter->num_tx_queues; j++) + adapter->tx_ring[j]->reg_idx = rbase_offset + j; + break; + } +} + +static void igb_configure_lli(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 port; + + /* LLI should only be enabled for MSI-X or MSI interrupts */ + if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI)) + return; + + if (adapter->lli_port) { + /* use filter 0 for port */ + port = htons((u16)adapter->lli_port); + E1000_WRITE_REG(hw, E1000_IMIR(0), + (port | E1000_IMIR_PORT_IM_EN)); + E1000_WRITE_REG(hw, E1000_IMIREXT(0), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); + } + + if (adapter->flags & IGB_FLAG_LLI_PUSH) { + /* use filter 1 for push flag */ + E1000_WRITE_REG(hw, E1000_IMIR(1), + (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN)); + E1000_WRITE_REG(hw, E1000_IMIREXT(1), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH)); + } + + if (adapter->lli_size) { + /* use filter 2 for size */ + E1000_WRITE_REG(hw, E1000_IMIR(2), + (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN)); + E1000_WRITE_REG(hw, E1000_IMIREXT(2), + (adapter->lli_size | E1000_IMIREXT_CTRL_BP)); + } + +} + +/** + * igb_write_ivar - configure ivar for given MSI-X vector + * @hw: pointer to the HW structure + * @msix_vector: vector number we are allocating to a given ring + * @index: row index of IVAR register to write within IVAR table + * @offset: column offset of in IVAR, should be multiple of 8 + * + * This function is intended to handle the writing of the IVAR register + * for adapters 82576 and newer. The IVAR table consists of 2 columns, + * each containing an cause allocation for an Rx and Tx ring, and a + * variable number of rows depending on the number of queues supported. + **/ +static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, + int index, int offset) +{ + u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + + /* clear any bits that are currently set */ + ivar &= ~((u32)0xFF << offset); + + /* write vector and valid bit */ + ivar |= (msix_vector | E1000_IVAR_VALID) << offset; + + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); +} + +#define IGB_N0_QUEUE -1 +static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + int rx_queue = IGB_N0_QUEUE; + int tx_queue = IGB_N0_QUEUE; + u32 msixbm = 0; + + if (q_vector->rx.ring) + rx_queue = q_vector->rx.ring->reg_idx; + if (q_vector->tx.ring) + tx_queue = q_vector->tx.ring->reg_idx; + + switch (hw->mac.type) { + case e1000_82575: + /* The 82575 assigns vectors using a bitmask, which matches the + bitmask for the EICR/EIMS/EIMC registers. To assign one + or more queues to a vector, we write the appropriate bits + into the MSIXBM register for that vector. */ + if (rx_queue > IGB_N0_QUEUE) + msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; + if (tx_queue > IGB_N0_QUEUE) + msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; + if (!adapter->msix_entries && msix_vector == 0) + msixbm |= E1000_EIMS_OTHER; + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm); + q_vector->eims_value = msixbm; + break; + case e1000_82576: + /* + * 82576 uses a table that essentially consists of 2 columns + * with 8 rows. The ordering is column-major so we use the + * lower 3 bits as the row index, and the 4th bit as the + * column offset. + */ + if (rx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + rx_queue & 0x7, + (rx_queue & 0x8) << 1); + if (tx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + tx_queue & 0x7, + ((tx_queue & 0x8) << 1) + 8); + q_vector->eims_value = 1 << msix_vector; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* + * On 82580 and newer adapters the scheme is similar to 82576 + * however instead of ordering column-major we have things + * ordered row-major. So we traverse the table by using + * bit 0 as the column offset, and the remaining bits as the + * row index. + */ + if (rx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + rx_queue >> 1, + (rx_queue & 0x1) << 4); + if (tx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + tx_queue >> 1, + ((tx_queue & 0x1) << 4) + 8); + q_vector->eims_value = 1 << msix_vector; + break; + default: + BUG(); + break; + } + + /* add q_vector eims value to global eims_enable_mask */ + adapter->eims_enable_mask |= q_vector->eims_value; + + /* configure q_vector to set itr on first interrupt */ + q_vector->set_itr = 1; +} + +/** + * igb_configure_msix - Configure MSI-X hardware + * + * igb_configure_msix sets up the hardware to properly + * generate MSI-X interrupts. + **/ +static void igb_configure_msix(struct igb_adapter *adapter) +{ + u32 tmp; + int i, vector = 0; + struct e1000_hw *hw = &adapter->hw; + + adapter->eims_enable_mask = 0; + + /* set vector for other causes, i.e. link changes */ + switch (hw->mac.type) { + case e1000_82575: + tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); + /* enable MSI-X PBA support*/ + tmp |= E1000_CTRL_EXT_PBA_CLR; + + /* Auto-Mask interrupts upon ICR read. */ + tmp |= E1000_CTRL_EXT_EIAME; + tmp |= E1000_CTRL_EXT_IRCA; + + E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); + + /* enable msix_other interrupt */ + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++, + E1000_EIMS_OTHER); + adapter->eims_other = E1000_EIMS_OTHER; + + break; + + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* Turn on MSI-X capability first, or our settings + * won't stick. And it will take days to debug. */ + E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | + E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_NSICR); + + /* enable msix_other interrupt */ + adapter->eims_other = 1 << vector; + tmp = (vector++ | E1000_IVAR_VALID) << 8; + + E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp); + break; + default: + /* do nothing, since nothing else supports MSI-X */ + break; + } /* switch (hw->mac.type) */ + + adapter->eims_enable_mask |= adapter->eims_other; + + for (i = 0; i < adapter->num_q_vectors; i++) + igb_assign_vector(adapter->q_vector[i], vector++); + + E1000_WRITE_FLUSH(hw); +} + +/** + * igb_request_msix - Initialize MSI-X interrupts + * + * igb_request_msix allocates MSI-X vectors and requests interrupts from the + * kernel. + **/ +static int igb_request_msix(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + int i, err = 0, vector = 0, free_vector = 0; + + err = request_irq(adapter->msix_entries[vector].vector, + &igb_msix_other, 0, netdev->name, adapter); + if (err) + goto err_out; + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igb_q_vector *q_vector = adapter->q_vector[i]; + + vector++; + + q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); + + if (q_vector->rx.ring && q_vector->tx.ring) + sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else if (q_vector->tx.ring) + sprintf(q_vector->name, "%s-tx-%u", netdev->name, + q_vector->tx.ring->queue_index); + else if (q_vector->rx.ring) + sprintf(q_vector->name, "%s-rx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else + sprintf(q_vector->name, "%s-unused", netdev->name); + + err = request_irq(adapter->msix_entries[vector].vector, + igb_msix_ring, 0, q_vector->name, + q_vector); + if (err) + goto err_free; + } + + igb_configure_msix(adapter); + return 0; + +err_free: + /* free already assigned IRQs */ + free_irq(adapter->msix_entries[free_vector++].vector, adapter); + + vector--; + for (i = 0; i < vector; i++) { + free_irq(adapter->msix_entries[free_vector++].vector, + adapter->q_vector[i]); + } +err_out: + return err; +} + +static void igb_reset_interrupt_capability(struct igb_adapter *adapter) +{ + if (adapter->msix_entries) { + pci_disable_msix(adapter->pdev); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + } else if (adapter->flags & IGB_FLAG_HAS_MSI) { + pci_disable_msi(adapter->pdev); + } +} + +/** + * igb_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + **/ +static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) +{ + struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + + if (q_vector->tx.ring) + adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; + + if (q_vector->rx.ring) + adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL; + + adapter->q_vector[v_idx] = NULL; + netif_napi_del(&q_vector->napi); +#ifndef IGB_NO_LRO + __skb_queue_purge(&q_vector->lrolist.active); +#endif + kfree(q_vector); +} + +/** + * igb_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize + * + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + **/ +static void igb_free_q_vectors(struct igb_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igb_free_q_vector(adapter, v_idx); +} + +/** + * igb_clear_interrupt_scheme - reset the device to a state of no interrupts + * + * This function resets the device so that it has 0 rx queues, tx queues, and + * MSI-X interrupts allocated. + */ +static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) +{ + igb_free_q_vectors(adapter); + igb_reset_interrupt_capability(adapter); +} + +/** + * igb_process_mdd_event + * @adapter - board private structure + * + * Identify a malicious VF, disable the VF TX/RX queues and log a message. + */ +static void igb_process_mdd_event(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 lvmmc, vfte, vfre, mdfb; + u8 vf_queue; + + lvmmc = E1000_READ_REG(hw, E1000_LVMMC); + vf_queue = lvmmc >> 29; + + /* VF index cannot be bigger or equal to VFs allocated */ + if (vf_queue >= adapter->vfs_allocated_count) + return; + + netdev_info(adapter->netdev, + "VF %d misbehaved. VF queues are disabled. " + "VM misbehavior code is 0x%x\n", vf_queue, lvmmc); + + /* Disable VFTE and VFRE related bits */ + vfte = E1000_READ_REG(hw, E1000_VFTE); + vfte &= ~(1 << vf_queue); + E1000_WRITE_REG(hw, E1000_VFTE, vfte); + + vfre = E1000_READ_REG(hw, E1000_VFRE); + vfre &= ~(1 << vf_queue); + E1000_WRITE_REG(hw, E1000_VFRE, vfre); + + /* Disable MDFB related bit. Clear on write */ + mdfb = E1000_READ_REG(hw, E1000_MDFB); + mdfb |= (1 << vf_queue); + E1000_WRITE_REG(hw, E1000_MDFB, mdfb); + + /* Reset the specific VF */ + E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST); +} + +/** + * igb_disable_mdd + * @adapter - board private structure + * + * Disable MDD behavior in the HW + **/ +static void igb_disable_mdd(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + if ((hw->mac.type != e1000_i350) || + (hw->mac.type != e1000_i354)) + return; + + reg = E1000_READ_REG(hw, E1000_DTXCTL); + reg &= (~E1000_DTXCTL_MDP_EN); + E1000_WRITE_REG(hw, E1000_DTXCTL, reg); +} + +/** + * igb_enable_mdd + * @adapter - board private structure + * + * Enable the HW to detect malicious driver and sends an interrupt to + * the driver. + **/ +static void igb_enable_mdd(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + /* Only available on i350 device */ + if (hw->mac.type != e1000_i350) + return; + + reg = E1000_READ_REG(hw, E1000_DTXCTL); + reg |= E1000_DTXCTL_MDP_EN; + E1000_WRITE_REG(hw, E1000_DTXCTL, reg); +} + +/** + * igb_reset_sriov_capability - disable SR-IOV if enabled + * + * Attempt to disable single root IO virtualization capabilites present in the + * kernel. + **/ +static void igb_reset_sriov_capability(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + + /* reclaim resources allocated to VFs */ + if (adapter->vf_data) { + if (!pci_vfs_assigned(pdev)) { + /* + * disable iov and allow time for transactions to + * clear + */ + pci_disable_sriov(pdev); + msleep(500); + + dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n"); + } else { + dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n " + "VF(s) are assigned to guests!\n"); + } + /* Disable Malicious Driver Detection */ + igb_disable_mdd(adapter); + + /* free vf data storage */ + kfree(adapter->vf_data); + adapter->vf_data = NULL; + + /* switch rings back to PF ownership */ + E1000_WRITE_REG(hw, E1000_IOVCTL, + E1000_IOVCTL_REUSE_VFQ); + E1000_WRITE_FLUSH(hw); + msleep(100); + } + + adapter->vfs_allocated_count = 0; +} + +/** + * igb_set_sriov_capability - setup SR-IOV if supported + * + * Attempt to enable single root IO virtualization capabilites present in the + * kernel. + **/ +static void igb_set_sriov_capability(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int old_vfs = 0; + int i; + + old_vfs = pci_num_vf(pdev); + if (old_vfs) { + dev_info(pci_dev_to_dev(pdev), + "%d pre-allocated VFs found - override " + "max_vfs setting of %d\n", old_vfs, + adapter->vfs_allocated_count); + adapter->vfs_allocated_count = old_vfs; + } + /* no VFs requested, do nothing */ + if (!adapter->vfs_allocated_count) + return; + + /* allocate vf data storage */ + adapter->vf_data = kcalloc(adapter->vfs_allocated_count, + sizeof(struct vf_data_storage), + GFP_KERNEL); + + if (adapter->vf_data) { + if (!old_vfs) { + if (pci_enable_sriov(pdev, + adapter->vfs_allocated_count)) + goto err_out; + } + for (i = 0; i < adapter->vfs_allocated_count; i++) + igb_vf_configure(adapter, i); + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_i350: + /* Enable VM to VM loopback by default */ + adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE; + break; + default: + /* Currently no other hardware supports loopback */ + break; + } + + /* DMA Coalescing is not supported in IOV mode. */ + if (adapter->hw.mac.type >= e1000_i350) + adapter->dmac = IGB_DMAC_DISABLE; + if (adapter->hw.mac.type < e1000_i350) + adapter->flags |= IGB_FLAG_DETECT_BAD_DMA; + return; + + } + +err_out: + kfree(adapter->vf_data); + adapter->vf_data = NULL; + adapter->vfs_allocated_count = 0; + dev_warn(pci_dev_to_dev(pdev), + "Failed to initialize SR-IOV virtualization\n"); +} + +/** + * igb_set_interrupt_capability - set MSI or MSI-X if supported + * + * Attempt to configure interrupts using the best available + * capabilities of the hardware and kernel. + **/ +static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) +{ + struct pci_dev *pdev = adapter->pdev; + int err; + int numvecs, i; + + if (!msix) + adapter->int_mode = IGB_INT_MODE_MSI; + + /* Number of supported queues. */ + adapter->num_rx_queues = adapter->rss_queues; + + if (adapter->vmdq_pools > 1) + adapter->num_rx_queues += adapter->vmdq_pools - 1; + +#ifdef HAVE_TX_MQ + if (adapter->vmdq_pools) + adapter->num_tx_queues = adapter->vmdq_pools; + else + adapter->num_tx_queues = adapter->num_rx_queues; +#else + adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools); +#endif + + switch (adapter->int_mode) { + case IGB_INT_MODE_MSIX: + /* start with one vector for every rx queue */ + numvecs = adapter->num_rx_queues; + + /* if tx handler is separate add 1 for every tx queue */ + if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) + numvecs += adapter->num_tx_queues; + + /* store the number of vectors reserved for queues */ + adapter->num_q_vectors = numvecs; + + /* add 1 vector for link status interrupts */ + numvecs++; + adapter->msix_entries = kcalloc(numvecs, + sizeof(struct msix_entry), + GFP_KERNEL); + if (adapter->msix_entries) { + for (i = 0; i < numvecs; i++) + adapter->msix_entries[i].entry = i; + + err = pci_enable_msix(pdev, + adapter->msix_entries, numvecs); + if (err == 0) + break; + } + /* MSI-X failed, so fall through and try MSI */ + dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. " + "Falling back to MSI interrupts.\n"); + igb_reset_interrupt_capability(adapter); + case IGB_INT_MODE_MSI: + if (!pci_enable_msi(pdev)) + adapter->flags |= IGB_FLAG_HAS_MSI; + else + dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI " + "interrupts. Falling back to legacy " + "interrupts.\n"); + /* Fall through */ + case IGB_INT_MODE_LEGACY: + /* disable advanced features and set number of queues to 1 */ + igb_reset_sriov_capability(adapter); + adapter->vmdq_pools = 0; + adapter->rss_queues = 1; + adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_q_vectors = 1; + /* Don't do anything; this is system default */ + break; + } +} + +static void igb_add_ring(struct igb_ring *ring, + struct igb_ring_container *head) +{ + head->ring = ring; + head->count++; +} + +/** + * igb_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + **/ +static int igb_alloc_q_vector(struct igb_adapter *adapter, + unsigned int v_count, unsigned int v_idx, + unsigned int txr_count, unsigned int txr_idx, + unsigned int rxr_count, unsigned int rxr_idx) +{ + struct igb_q_vector *q_vector; + struct igb_ring *ring; + int ring_count, size; + + /* igb only supports 1 Tx and/or 1 Rx queue per vector */ + if (txr_count > 1 || rxr_count > 1) + return -ENOMEM; + + ring_count = txr_count + rxr_count; + size = sizeof(struct igb_q_vector) + + (sizeof(struct igb_ring) * ring_count); + + /* allocate q_vector and rings */ + q_vector = kzalloc(size, GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + +#ifndef IGB_NO_LRO + /* initialize LRO */ + __skb_queue_head_init(&q_vector->lrolist.active); + +#endif + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, + igb_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + + /* initialize work limits */ + q_vector->tx.work_limit = adapter->tx_work_limit; + + /* initialize ITR configuration */ + q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); + q_vector->itr_val = IGB_START_ITR; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + /* intialize ITR */ + if (rxr_count) { + /* rx or rx/tx vector */ + if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) + q_vector->itr_val = adapter->rx_itr_setting; + } else { + /* tx only vector */ + if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) + q_vector->itr_val = adapter->tx_itr_setting; + } + + if (txr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + igb_add_ring(ring, &q_vector->tx); + + /* For 82575, context index must be unique per ring. */ + if (adapter->hw.mac.type == e1000_82575) + set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = txr_idx; + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* push pointer to next ring */ + ring++; + } + + if (rxr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + igb_add_ring(ring, &q_vector->rx); + +#ifndef HAVE_NDO_SET_FEATURES + /* enable rx checksum */ + set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags); + +#endif + /* set flag indicating ring supports SCTP checksum offload */ + if (adapter->hw.mac.type >= e1000_82576) + set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); + + if ((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) + set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); + + /* apply Rx specific ring traits */ + ring->count = adapter->rx_ring_count; + ring->queue_index = rxr_idx; + + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; + } + + return 0; +} + +/** + * igb_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + **/ +static int igb_alloc_q_vectors(struct igb_adapter *adapter) +{ + int q_vectors = adapter->num_q_vectors; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++) { + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + 0, 0, 1, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining--; + rxr_idx++; + } + } + + for (; v_idx < q_vectors; v_idx++) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + tqpv, txr_idx, rqpv, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + txr_remaining -= tqpv; + rxr_idx++; + txr_idx++; + } + + return 0; + +err_out: + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igb_free_q_vector(adapter, v_idx); + + return -ENOMEM; +} + +/** + * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * + * This function initializes the interrupts and allocates all of the queues. + **/ +static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix) +{ + struct pci_dev *pdev = adapter->pdev; + int err; + + igb_set_interrupt_capability(adapter, msix); + + err = igb_alloc_q_vectors(adapter); + if (err) { + dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n"); + goto err_alloc_q_vectors; + } + + igb_cache_ring_register(adapter); + + return 0; + +err_alloc_q_vectors: + igb_reset_interrupt_capability(adapter); + return err; +} + +/** + * igb_request_irq - initialize interrupts + * + * Attempts to configure interrupts using the best available + * capabilities of the hardware and kernel. + **/ +static int igb_request_irq(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + if (adapter->msix_entries) { + err = igb_request_msix(adapter); + if (!err) + goto request_done; + /* fall back to MSI */ + igb_free_all_tx_resources(adapter); + igb_free_all_rx_resources(adapter); + + igb_clear_interrupt_scheme(adapter); + igb_reset_sriov_capability(adapter); + err = igb_init_interrupt_scheme(adapter, false); + if (err) + goto request_done; + igb_setup_all_tx_resources(adapter); + igb_setup_all_rx_resources(adapter); + igb_configure(adapter); + } + + igb_assign_vector(adapter->q_vector[0], 0); + + if (adapter->flags & IGB_FLAG_HAS_MSI) { + err = request_irq(pdev->irq, &igb_intr_msi, 0, + netdev->name, adapter); + if (!err) + goto request_done; + + /* fall back to legacy interrupts */ + igb_reset_interrupt_capability(adapter); + adapter->flags &= ~IGB_FLAG_HAS_MSI; + } + + err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED, + netdev->name, adapter); + + if (err) + dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n", + err); + +request_done: + return err; +} + +static void igb_free_irq(struct igb_adapter *adapter) +{ + if (adapter->msix_entries) { + int vector = 0, i; + + free_irq(adapter->msix_entries[vector++].vector, adapter); + + for (i = 0; i < adapter->num_q_vectors; i++) + free_irq(adapter->msix_entries[vector++].vector, + adapter->q_vector[i]); + } else { + free_irq(adapter->pdev->irq, adapter); + } +} + +/** + * igb_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + **/ +static void igb_irq_disable(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + /* + * we need to be careful when disabling interrupts. The VFs are also + * mapped into these registers and so clearing the bits can cause + * issues on the VF drivers so we only need to clear what we set + */ + if (adapter->msix_entries) { + u32 regval = E1000_READ_REG(hw, E1000_EIAM); + E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask); + E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask); + regval = E1000_READ_REG(hw, E1000_EIAC); + E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask); + } + + E1000_WRITE_REG(hw, E1000_IAM, 0); + E1000_WRITE_REG(hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(hw); + + if (adapter->msix_entries) { + int vector = 0, i; + + synchronize_irq(adapter->msix_entries[vector++].vector); + + for (i = 0; i < adapter->num_q_vectors; i++) + synchronize_irq(adapter->msix_entries[vector++].vector); + } else { + synchronize_irq(adapter->pdev->irq); + } +} + +/** + * igb_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + **/ +static void igb_irq_enable(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; + u32 regval = E1000_READ_REG(hw, E1000_EIAC); + E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask); + regval = E1000_READ_REG(hw, E1000_EIAM); + E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask); + E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask); + if (adapter->vfs_allocated_count) { + E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF); + ims |= E1000_IMS_VMMB; + if (adapter->mdd) + if ((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) + ims |= E1000_IMS_MDDET; + } + E1000_WRITE_REG(hw, E1000_IMS, ims); + } else { + E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK | + E1000_IMS_DRSTA); + E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK | + E1000_IMS_DRSTA); + } +} + +static void igb_update_mng_vlan(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 vid = adapter->hw.mng_cookie.vlan_id; + u16 old_vid = adapter->mng_vlan_id; + + if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { + /* add VID to filter table */ + igb_vfta_set(adapter, vid, TRUE); + adapter->mng_vlan_id = vid; + } else { + adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; + } + + if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && + (vid != old_vid) && +#ifdef HAVE_VLAN_RX_REGISTER + !vlan_group_get_device(adapter->vlgrp, old_vid)) { +#else + !test_bit(old_vid, adapter->active_vlans)) { +#endif + /* remove VID from filter table */ + igb_vfta_set(adapter, old_vid, FALSE); + } +} + +/** + * igb_release_hw_control - release control of the h/w to f/w + * @adapter: address of board private structure + * + * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. + * + **/ +static void igb_release_hw_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext; + + /* Let firmware take over control of h/w */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + E1000_WRITE_REG(hw, E1000_CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); +} + +/** + * igb_get_hw_control - get control of the h/w from f/w + * @adapter: address of board private structure + * + * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. + * + **/ +static void igb_get_hw_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext; + + /* Let firmware know the driver has taken over */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + E1000_WRITE_REG(hw, E1000_CTRL_EXT, + ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); +} + +/** + * igb_configure - configure the hardware for RX and TX + * @adapter: private board structure + **/ +static void igb_configure(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int i; + + igb_get_hw_control(adapter); + igb_set_rx_mode(netdev); + + igb_restore_vlan(adapter); + + igb_setup_tctl(adapter); + igb_setup_mrqc(adapter); + igb_setup_rctl(adapter); + + igb_configure_tx(adapter); + igb_configure_rx(adapter); + + e1000_rx_fifo_flush_82575(&adapter->hw); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + if (adapter->num_tx_queues > 1) + netdev->features |= NETIF_F_MULTI_QUEUE; + else + netdev->features &= ~NETIF_F_MULTI_QUEUE; +#endif + + /* call igb_desc_unused which always leaves + * at least 1 descriptor unused to make sure + * next_to_use != next_to_clean */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *ring = adapter->rx_ring[i]; + igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); + } +} + +/** + * igb_power_up_link - Power up the phy/serdes link + * @adapter: address of board private structure + **/ +void igb_power_up_link(struct igb_adapter *adapter) +{ + e1000_phy_hw_reset(&adapter->hw); + + if (adapter->hw.phy.media_type == e1000_media_type_copper) + e1000_power_up_phy(&adapter->hw); + else + e1000_power_up_fiber_serdes_link(&adapter->hw); +} + +/** + * igb_power_down_link - Power down the phy/serdes link + * @adapter: address of board private structure + */ +static void igb_power_down_link(struct igb_adapter *adapter) +{ + if (adapter->hw.phy.media_type == e1000_media_type_copper) + e1000_power_down_phy(&adapter->hw); + else + e1000_shutdown_fiber_serdes_link(&adapter->hw); +} + +/* Detect and switch function for Media Auto Sense */ +static void igb_check_swap_media(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext, connsw; + bool swap_now = false; + bool link; + + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + connsw = E1000_READ_REG(hw, E1000_CONNSW); + link = igb_has_link(adapter); + + /* need to live swap if current media is copper and we have fiber/serdes + * to go to. + */ + + if ((hw->phy.media_type == e1000_media_type_copper) && + (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { + swap_now = true; + } else if (!(connsw & E1000_CONNSW_SERDESD)) { + /* copper signal takes time to appear */ + if (adapter->copper_tries < 2) { + adapter->copper_tries++; + connsw |= E1000_CONNSW_AUTOSENSE_CONF; + E1000_WRITE_REG(hw, E1000_CONNSW, connsw); + return; + } else { + adapter->copper_tries = 0; + if ((connsw & E1000_CONNSW_PHYSD) && + (!(connsw & E1000_CONNSW_PHY_PDN))) { + swap_now = true; + connsw &= ~E1000_CONNSW_AUTOSENSE_CONF; + E1000_WRITE_REG(hw, E1000_CONNSW, connsw); + } + } + } + + if (swap_now) { + switch (hw->phy.media_type) { + case e1000_media_type_copper: + dev_info(pci_dev_to_dev(adapter->pdev), + "%s:MAS: changing media to fiber/serdes\n", + adapter->netdev->name); + ctrl_ext |= + E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + adapter->copper_tries = 0; + break; + case e1000_media_type_internal_serdes: + case e1000_media_type_fiber: + dev_info(pci_dev_to_dev(adapter->pdev), + "%s:MAS: changing media to copper\n", + adapter->netdev->name); + ctrl_ext &= + ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + break; + default: + /* shouldn't get here during regular operation */ + dev_err(pci_dev_to_dev(adapter->pdev), + "%s:AMS: Invalid media type found, returning\n", + adapter->netdev->name); + break; + } + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + } +} + +#ifdef HAVE_I2C_SUPPORT +/* igb_get_i2c_data - Reads the I2C SDA data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Returns the I2C data bit value + */ +static int igb_get_i2c_data(void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + return (i2cctl & E1000_I2C_DATA_IN) != 0; +} + +/* igb_set_i2c_data - Sets the I2C data bit + * @data: pointer to hardware structure + * @state: I2C data value (0 or 1) to set + * + * Sets the I2C data bit + */ +static void igb_set_i2c_data(void *data, int state) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + if (state) + i2cctl |= E1000_I2C_DATA_OUT; + else + i2cctl &= ~E1000_I2C_DATA_OUT; + + i2cctl &= ~E1000_I2C_DATA_OE_N; + i2cctl |= E1000_I2C_CLK_OE_N; + + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); + E1000_WRITE_FLUSH(hw); + +} + +/* igb_set_i2c_clk - Sets the I2C SCL clock + * @data: pointer to hardware structure + * @state: state to set clock + * + * Sets the I2C clock line to state + */ +static void igb_set_i2c_clk(void *data, int state) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + if (state) { + i2cctl |= E1000_I2C_CLK_OUT; + i2cctl &= ~E1000_I2C_CLK_OE_N; + } else { + i2cctl &= ~E1000_I2C_CLK_OUT; + i2cctl &= ~E1000_I2C_CLK_OE_N; + } + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); + E1000_WRITE_FLUSH(hw); +} + +/* igb_get_i2c_clk - Gets the I2C SCL clock state + * @data: pointer to hardware structure + * + * Gets the I2C clock state + */ +static int igb_get_i2c_clk(void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + return (i2cctl & E1000_I2C_CLK_IN) != 0; +} + +static const struct i2c_algo_bit_data igb_i2c_algo = { + .setsda = igb_set_i2c_data, + .setscl = igb_set_i2c_clk, + .getsda = igb_get_i2c_data, + .getscl = igb_get_i2c_clk, + .udelay = 5, + .timeout = 20, +}; + +/* igb_init_i2c - Init I2C interface + * @adapter: pointer to adapter structure + * + */ +static s32 igb_init_i2c(struct igb_adapter *adapter) +{ + s32 status = E1000_SUCCESS; + + /* I2C interface supported on i350 devices */ + if (adapter->hw.mac.type != e1000_i350) + return E1000_SUCCESS; + + /* Initialize the i2c bus which is controlled by the registers. + * This bus will use the i2c_algo_bit structue that implements + * the protocol through toggling of the 4 bits in the register. + */ + adapter->i2c_adap.owner = THIS_MODULE; + adapter->i2c_algo = igb_i2c_algo; + adapter->i2c_algo.data = adapter; + adapter->i2c_adap.algo_data = &adapter->i2c_algo; + adapter->i2c_adap.dev.parent = &adapter->pdev->dev; + strlcpy(adapter->i2c_adap.name, "igb BB", + sizeof(adapter->i2c_adap.name)); + status = i2c_bit_add_bus(&adapter->i2c_adap); + return status; +} + +#endif /* HAVE_I2C_SUPPORT */ +/** + * igb_up - Open the interface and prepare it to handle traffic + * @adapter: board private structure + **/ +int igb_up(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + /* hardware has been reset, we need to reload some things */ + igb_configure(adapter); + + clear_bit(__IGB_DOWN, &adapter->state); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_enable(&(adapter->q_vector[i]->napi)); + + if (adapter->msix_entries) + igb_configure_msix(adapter); + else + igb_assign_vector(adapter->q_vector[0], 0); + + igb_configure_lli(adapter); + + /* Clear any pending interrupts. */ + E1000_READ_REG(hw, E1000_ICR); + igb_irq_enable(adapter); + + /* notify VFs that reset has been completed */ + if (adapter->vfs_allocated_count) { + u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data); + } + + netif_tx_start_all_queues(adapter->netdev); + + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + schedule_work(&adapter->dma_err_task); + /* start the watchdog. */ + hw->mac.get_link_status = 1; + schedule_work(&adapter->watchdog_task); + + if ((adapter->flags & IGB_FLAG_EEE) && + (!hw->dev_spec._82575.eee_disable)) + adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; + + return 0; +} + +void igb_down(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + u32 tctl, rctl; + int i; + + /* signal that we're down so the interrupt handler does not + * reschedule our watchdog timer */ + set_bit(__IGB_DOWN, &adapter->state); + + /* disable receives in the hardware */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); + /* flush and sleep below */ + + netif_tx_stop_all_queues(netdev); + + /* disable transmits in the hardware */ + tctl = E1000_READ_REG(hw, E1000_TCTL); + tctl &= ~E1000_TCTL_EN; + E1000_WRITE_REG(hw, E1000_TCTL, tctl); + /* flush both disables and wait for them to finish */ + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_disable(&(adapter->q_vector[i]->napi)); + + igb_irq_disable(adapter); + + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + + del_timer_sync(&adapter->watchdog_timer); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + del_timer_sync(&adapter->dma_err_timer); + del_timer_sync(&adapter->phy_info_timer); + + netif_carrier_off(netdev); + + /* record the stats before reset*/ + igb_update_stats(adapter); + + adapter->link_speed = 0; + adapter->link_duplex = 0; + +#ifdef HAVE_PCI_ERS + if (!pci_channel_offline(adapter->pdev)) + igb_reset(adapter); +#else + igb_reset(adapter); +#endif + igb_clean_all_tx_rings(adapter); + igb_clean_all_rx_rings(adapter); +#ifdef IGB_DCA + /* since we reset the hardware DCA settings were cleared */ + igb_setup_dca(adapter); +#endif +} + +void igb_reinit_locked(struct igb_adapter *adapter) +{ + WARN_ON(in_interrupt()); + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + igb_down(adapter); + igb_up(adapter); + clear_bit(__IGB_RESETTING, &adapter->state); +} + +/** + * igb_enable_mas - Media Autosense re-enable after swap + * + * @adapter: adapter struct + **/ +static s32 igb_enable_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 connsw; + s32 ret_val = E1000_SUCCESS; + + connsw = E1000_READ_REG(hw, E1000_CONNSW); + if (hw->phy.media_type == e1000_media_type_copper) { + /* configure for SerDes media detect */ + if (!(connsw & E1000_CONNSW_SERDESD)) { + connsw |= E1000_CONNSW_ENRGSRC; + connsw |= E1000_CONNSW_AUTOSENSE_EN; + E1000_WRITE_REG(hw, E1000_CONNSW, connsw); + E1000_WRITE_FLUSH(hw); + } else if (connsw & E1000_CONNSW_SERDESD) { + /* already SerDes, no need to enable anything */ + return ret_val; + } else { + dev_info(pci_dev_to_dev(adapter->pdev), + "%s:MAS: Unable to configure feature, disabling..\n", + adapter->netdev->name); + adapter->flags &= ~IGB_FLAG_MAS_ENABLE; + } + } + return ret_val; +} + +void igb_reset(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + struct e1000_mac_info *mac = &hw->mac; + struct e1000_fc_info *fc = &hw->fc; + u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; + + /* Repartition Pba for greater than 9k mtu + * To take effect CTRL.RST is required. + */ + switch (mac->type) { + case e1000_i350: + case e1000_82580: + case e1000_i354: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba = e1000_rxpbs_adjust_82580(pba); + break; + case e1000_82576: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba &= E1000_RXPBS_SIZE_MASK_82576; + break; + case e1000_82575: + case e1000_i210: + case e1000_i211: + default: + pba = E1000_PBA_34K; + break; + } + + if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && + (mac->type < e1000_82576)) { + /* adjust PBA for jumbo frames */ + E1000_WRITE_REG(hw, E1000_PBA, pba); + + /* To maintain wire speed transmits, the Tx FIFO should be + * large enough to accommodate two full transmit packets, + * rounded up to the next 1KB and expressed in KB. Likewise, + * the Rx FIFO should be large enough to accommodate at least + * one full receive packet and is similarly rounded up and + * expressed in KB. */ + pba = E1000_READ_REG(hw, E1000_PBA); + /* upper 16 bits has Tx packet buffer allocation size in KB */ + tx_space = pba >> 16; + /* lower 16 bits has Rx packet buffer allocation size in KB */ + pba &= 0xffff; + /* the tx fifo also stores 16 bytes of information about the tx + * but don't include ethernet FCS because hardware appends it */ + min_tx_space = (adapter->max_frame_size + + sizeof(union e1000_adv_tx_desc) - + ETH_FCS_LEN) * 2; + min_tx_space = ALIGN(min_tx_space, 1024); + min_tx_space >>= 10; + /* software strips receive CRC, so leave room for it */ + min_rx_space = adapter->max_frame_size; + min_rx_space = ALIGN(min_rx_space, 1024); + min_rx_space >>= 10; + + /* If current Tx allocation is less than the min Tx FIFO size, + * and the min Tx FIFO size is less than the current Rx FIFO + * allocation, take space away from current Rx allocation */ + if (tx_space < min_tx_space && + ((min_tx_space - tx_space) < pba)) { + pba = pba - (min_tx_space - tx_space); + + /* if short on rx space, rx wins and must trump tx + * adjustment */ + if (pba < min_rx_space) + pba = min_rx_space; + } + E1000_WRITE_REG(hw, E1000_PBA, pba); + } + + /* flow control settings */ + /* The high water mark must be low enough to fit one full frame + * (or the size used for early receive) above it in the Rx FIFO. + * Set it to the lower of: + * - 90% of the Rx FIFO size, or + * - the full Rx FIFO size minus one full frame */ + hwm = min(((pba << 10) * 9 / 10), + ((pba << 10) - 2 * adapter->max_frame_size)); + + fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ + fc->low_water = fc->high_water - 16; + fc->pause_time = 0xFFFF; + fc->send_xon = 1; + fc->current_mode = fc->requested_mode; + + /* disable receive for all VFs and wait one second */ + if (adapter->vfs_allocated_count) { + int i; + /* + * Clear all flags except indication that the PF has set + * the VF MAC addresses administratively + */ + for (i = 0 ; i < adapter->vfs_allocated_count; i++) + adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; + + /* ping all the active vfs to let them know we are going down */ + igb_ping_all_vfs(adapter); + + /* disable transmits and receives */ + E1000_WRITE_REG(hw, E1000_VFRE, 0); + E1000_WRITE_REG(hw, E1000_VFTE, 0); + } + + /* Allow time for pending master requests to run */ + e1000_reset_hw(hw); + E1000_WRITE_REG(hw, E1000_WUC, 0); + + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + e1000_setup_init_funcs(hw, TRUE); + igb_check_options(adapter); + e1000_get_bus_info(hw); + adapter->flags &= ~IGB_FLAG_MEDIA_RESET; + } + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + if (igb_enable_mas(adapter)) + dev_err(pci_dev_to_dev(pdev), + "Error enabling Media Auto Sense\n"); + } + if (e1000_init_hw(hw)) + dev_err(pci_dev_to_dev(pdev), "Hardware Error\n"); + + /* + * Flow control settings reset on hardware reset, so guarantee flow + * control is off when forcing speed. + */ + if (!hw->mac.autoneg) + e1000_force_mac_fc(hw); + + igb_init_dmac(adapter, pba); + /* Re-initialize the thermal sensor on i350 devices. */ + if (mac->type == e1000_i350 && hw->bus.func == 0) { + /* + * If present, re-initialize the external thermal sensor + * interface. + */ + if (adapter->ets) + e1000_set_i2c_bb(hw); + e1000_init_thermal_sensor_thresh(hw); + } + + /*Re-establish EEE setting */ + if (hw->phy.media_type == e1000_media_type_copper) { + switch (mac->type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + e1000_set_eee_i350(hw); + break; + case e1000_i354: + e1000_set_eee_i354(hw); + break; + default: + break; + } + } + + if (!netif_running(adapter->netdev)) + igb_power_down_link(adapter); + + igb_update_mng_vlan(adapter); + + /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ + E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE); + + +#ifdef HAVE_PTP_1588_CLOCK + /* Re-enable PTP, where applicable. */ + igb_ptp_reset(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + + e1000_get_phy_info(hw); + + adapter->devrc++; +} + +#ifdef HAVE_NDO_SET_FEATURES +static kni_netdev_features_t igb_fix_features(struct net_device *netdev, + kni_netdev_features_t features) +{ + /* + * Since there is no support for separate tx vlan accel + * enabled make sure tx flag is cleared if rx is. + */ +#ifdef NETIF_F_HW_VLAN_CTAG_RX + if (!(features & NETIF_F_HW_VLAN_CTAG_RX)) + features &= ~NETIF_F_HW_VLAN_CTAG_TX; +#else + if (!(features & NETIF_F_HW_VLAN_RX)) + features &= ~NETIF_F_HW_VLAN_TX; +#endif + + /* If Rx checksum is disabled, then LRO should also be disabled */ + if (!(features & NETIF_F_RXCSUM)) + features &= ~NETIF_F_LRO; + + return features; +} + +static int igb_set_features(struct net_device *netdev, + kni_netdev_features_t features) +{ + u32 changed = netdev->features ^ features; + +#ifdef NETIF_F_HW_VLAN_CTAG_RX + if (changed & NETIF_F_HW_VLAN_CTAG_RX) +#else + if (changed & NETIF_F_HW_VLAN_RX) +#endif + igb_vlan_mode(netdev, features); + + return 0; +} + +#ifdef NTF_SELF +#ifdef USE_CONST_DEV_UC_CHAR +static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, +#ifdef HAVE_NDO_FDB_ADD_VID + u16 vid, +#endif + u16 flags) +#else +static int igb_ndo_fdb_add(struct ndmsg *ndm, + struct net_device *dev, + unsigned char *addr, + u16 flags) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + int err; + + if (!(adapter->vfs_allocated_count)) + return -EOPNOTSUPP; + + /* Hardware does not support aging addresses so if a + * ndm_state is given only allow permanent addresses + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + pr_info("%s: FDB only supports static addresses\n", + igb_driver_name); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { + u32 rar_uc_entries = hw->mac.rar_entry_count - + (adapter->vfs_allocated_count + 1); + + if (netdev_uc_count(dev) < rar_uc_entries) + err = dev_uc_add_excl(dev, addr); + else + err = -ENOMEM; + } else if (is_multicast_ether_addr(addr)) { + err = dev_mc_add_excl(dev, addr); + } else { + err = -EINVAL; + } + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} + +#ifndef USE_DEFAULT_FDB_DEL_DUMP +#ifdef USE_CONST_DEV_UC_CHAR +static int igb_ndo_fdb_del(struct ndmsg *ndm, + struct net_device *dev, + const unsigned char *addr) +#else +static int igb_ndo_fdb_del(struct ndmsg *ndm, + struct net_device *dev, + unsigned char *addr) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + int err = -EOPNOTSUPP; + + if (ndm->ndm_state & NUD_PERMANENT) { + pr_info("%s: FDB only supports static addresses\n", + igb_driver_name); + return -EINVAL; + } + + if (adapter->vfs_allocated_count) { + if (is_unicast_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + } + + return err; +} + +static int igb_ndo_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + int idx) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + if (adapter->vfs_allocated_count) + idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + + return idx; +} +#endif /* USE_DEFAULT_FDB_DEL_DUMP */ + +#ifdef HAVE_BRIDGE_ATTRIBS +#ifdef HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS +static int igb_ndo_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) +#else +static int igb_ndo_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh) +#endif /* HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS */ +{ + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + struct nlattr *attr, *br_spec; + int rem; + + if (!(adapter->vfs_allocated_count)) + return -EOPNOTSUPP; + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_i350: + case e1000_i354: + break; + default: + return -EOPNOTSUPP; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + + nla_for_each_nested(attr, br_spec, rem) { + __u16 mode; + + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + mode = nla_get_u16(attr); + if (mode == BRIDGE_MODE_VEPA) { + e1000_vmdq_set_loopback_pf(hw, 0); + adapter->flags &= ~IGB_FLAG_LOOPBACK_ENABLE; + } else if (mode == BRIDGE_MODE_VEB) { + e1000_vmdq_set_loopback_pf(hw, 1); + adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE; + } else + return -EINVAL; + + netdev_info(adapter->netdev, "enabling bridge mode: %s\n", + mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB"); + } + + return 0; +} + +#ifdef HAVE_BRIDGE_FILTER +#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS +static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +#else +static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask) +#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */ +#else +static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + u16 mode; + + if (!(adapter->vfs_allocated_count)) + return -EOPNOTSUPP; + + if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE) + mode = BRIDGE_MODE_VEB; + else + mode = BRIDGE_MODE_VEPA; + +#ifdef HAVE_NDO_DFLT_BRIDGE_ADD_MASK +#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS +#ifdef HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, + nlflags, filter_mask, NULL); +#else + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, nlflags); +#endif /* HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL */ +#else + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0); +#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */ +#else + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode); +#endif /* HAVE_NDO_DFLT_BRIDGE_ADD_MASK */ +} +#endif /* HAVE_BRIDGE_ATTRIBS */ +#endif /* NTF_SELF */ + +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef HAVE_NET_DEVICE_OPS +static const struct net_device_ops igb_netdev_ops = { + .ndo_open = igb_open, + .ndo_stop = igb_close, + .ndo_start_xmit = igb_xmit_frame, + .ndo_get_stats = igb_get_stats, + .ndo_set_rx_mode = igb_set_rx_mode, + .ndo_set_mac_address = igb_set_mac, + .ndo_change_mtu = igb_change_mtu, + .ndo_do_ioctl = igb_ioctl, + .ndo_tx_timeout = igb_tx_timeout, + .ndo_validate_addr = eth_validate_addr, + .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, +#ifdef IFLA_VF_MAX + .ndo_set_vf_mac = igb_ndo_set_vf_mac, + .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, +#ifdef HAVE_VF_MIN_MAX_TXRATE + .ndo_set_vf_rate = igb_ndo_set_vf_bw, +#else /* HAVE_VF_MIN_MAX_TXRATE */ + .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, +#endif /* HAVE_VF_MIN_MAX_TXRATE */ + .ndo_get_vf_config = igb_ndo_get_vf_config, +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, +#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */ +#endif /* IFLA_VF_MAX */ +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = igb_netpoll, +#endif +#ifdef HAVE_NDO_SET_FEATURES + .ndo_fix_features = igb_fix_features, + .ndo_set_features = igb_set_features, +#endif +#ifdef HAVE_VLAN_RX_REGISTER + .ndo_vlan_rx_register = igb_vlan_mode, +#endif +#ifndef HAVE_RHEL6_NETDEV_OPS_EXT_FDB +#ifdef NTF_SELF + .ndo_fdb_add = igb_ndo_fdb_add, +#ifndef USE_DEFAULT_FDB_DEL_DUMP + .ndo_fdb_del = igb_ndo_fdb_del, + .ndo_fdb_dump = igb_ndo_fdb_dump, +#endif +#endif /* ! HAVE_RHEL6_NETDEV_OPS_EXT_FDB */ +#ifdef HAVE_BRIDGE_ATTRIBS + .ndo_bridge_setlink = igb_ndo_bridge_setlink, + .ndo_bridge_getlink = igb_ndo_bridge_getlink, +#endif /* HAVE_BRIDGE_ATTRIBS */ +#endif +}; + +#ifdef CONFIG_IGB_VMDQ_NETDEV +static const struct net_device_ops igb_vmdq_ops = { + .ndo_open = &igb_vmdq_open, + .ndo_stop = &igb_vmdq_close, + .ndo_start_xmit = &igb_vmdq_xmit_frame, + .ndo_get_stats = &igb_vmdq_get_stats, + .ndo_set_rx_mode = &igb_vmdq_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = &igb_vmdq_set_mac, + .ndo_change_mtu = &igb_vmdq_change_mtu, + .ndo_tx_timeout = &igb_vmdq_tx_timeout, + .ndo_vlan_rx_register = &igb_vmdq_vlan_rx_register, + .ndo_vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid, +}; + +#endif /* CONFIG_IGB_VMDQ_NETDEV */ +#endif /* HAVE_NET_DEVICE_OPS */ +#ifdef CONFIG_IGB_VMDQ_NETDEV +void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev) +{ +#ifdef HAVE_NET_DEVICE_OPS + vnetdev->netdev_ops = &igb_vmdq_ops; +#else + dev->open = &igb_vmdq_open; + dev->stop = &igb_vmdq_close; + dev->hard_start_xmit = &igb_vmdq_xmit_frame; + dev->get_stats = &igb_vmdq_get_stats; +#ifdef HAVE_SET_RX_MODE + dev->set_rx_mode = &igb_vmdq_set_rx_mode; +#endif + dev->set_multicast_list = &igb_vmdq_set_rx_mode; + dev->set_mac_address = &igb_vmdq_set_mac; + dev->change_mtu = &igb_vmdq_change_mtu; +#ifdef HAVE_TX_TIMEOUT + dev->tx_timeout = &igb_vmdq_tx_timeout; +#endif +#if defined(NETIF_F_HW_VLAN_TX) || defined(NETIF_F_HW_VLAN_CTAG_TX) + dev->vlan_rx_register = &igb_vmdq_vlan_rx_register; + dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid; +#endif +#endif + igb_vmdq_set_ethtool_ops(vnetdev); + vnetdev->watchdog_timeo = 5 * HZ; + +} + +int igb_init_vmdq_netdevs(struct igb_adapter *adapter) +{ + int pool, err = 0, base_queue; + struct net_device *vnetdev; + struct igb_vmdq_adapter *vmdq_adapter; + + for (pool = 1; pool < adapter->vmdq_pools; pool++) { + int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues); + base_queue = pool * qpp; + vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter)); + if (!vnetdev) { + err = -ENOMEM; + break; + } + vmdq_adapter = netdev_priv(vnetdev); + vmdq_adapter->vnetdev = vnetdev; + vmdq_adapter->real_adapter = adapter; + vmdq_adapter->rx_ring = adapter->rx_ring[base_queue]; + vmdq_adapter->tx_ring = adapter->tx_ring[base_queue]; + igb_assign_vmdq_netdev_ops(vnetdev); + snprintf(vnetdev->name, IFNAMSIZ, "%sv%d", + adapter->netdev->name, pool); + vnetdev->features = adapter->netdev->features; +#ifdef HAVE_NETDEV_VLAN_FEATURES + vnetdev->vlan_features = adapter->netdev->vlan_features; +#endif + adapter->vmdq_netdev[pool-1] = vnetdev; + err = register_netdev(vnetdev); + if (err) + break; + } + return err; +} + +int igb_remove_vmdq_netdevs(struct igb_adapter *adapter) +{ + int pool, err = 0; + + for (pool = 1; pool < adapter->vmdq_pools; pool++) { + unregister_netdev(adapter->vmdq_netdev[pool-1]); + free_netdev(adapter->vmdq_netdev[pool-1]); + adapter->vmdq_netdev[pool-1] = NULL; + } + return err; +} +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + +/** + * igb_set_fw_version - Configure version string for ethtool + * @adapter: adapter struct + * + **/ +static void igb_set_fw_version(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_fw_version fw; + + e1000_get_fw_version(hw, &fw); + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + if (!(e1000_get_flash_presence_i210(hw))) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, fw.invm_img_type); + break; + } + /* fall through */ + default: + /* if option rom is valid, display its version too*/ + if (fw.or_valid) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); + /* no option rom */ + } else { + if (fw.etrack_id != 0X0000) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, fw.etrack_id); + } else { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d.%d", + fw.eep_major, fw.eep_minor, fw.eep_build); + } + } + break; + } + + return; +} + +/** + * igb_init_mas - init Media Autosense feature if enabled in the NVM + * + * @adapter: adapter struct + **/ +static void igb_init_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 eeprom_data; + + e1000_read_nvm(hw, NVM_COMPAT, 1, &eeprom_data); + switch (hw->bus.func) { + case E1000_FUNC_0: + if (eeprom_data & IGB_MAS_ENABLE_0) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + case E1000_FUNC_1: + if (eeprom_data & IGB_MAS_ENABLE_1) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + case E1000_FUNC_2: + if (eeprom_data & IGB_MAS_ENABLE_2) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + case E1000_FUNC_3: + if (eeprom_data & IGB_MAS_ENABLE_3) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + default: + /* Shouldn't get here */ + dev_err(pci_dev_to_dev(adapter->pdev), + "%s:AMS: Invalid port configuration, returning\n", + adapter->netdev->name); + break; + } +} + +/** + * igb_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in igb_pci_tbl + * + * Returns 0 on success, negative on failure + * + * igb_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +static int __devinit igb_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *netdev; + struct igb_adapter *adapter; + struct e1000_hw *hw; + u16 eeprom_data = 0; + u8 pba_str[E1000_PBANUM_LENGTH]; + s32 ret_val; + static int global_quad_port_a; /* global quad port a indication */ + int i, err, pci_using_dac; + static int cards_found; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + pci_using_dac = 0; + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) + pci_using_dac = 1; + } else { + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + IGB_ERR("No usable DMA configuration, " + "aborting\n"); + goto err_dma; + } + } + } + +#ifndef HAVE_ASPM_QUIRKS + /* 82575 requires that the pci-e link partner disable the L0s state */ + switch (pdev->device) { + case E1000_DEV_ID_82575EB_COPPER: + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82575GB_QUAD_COPPER: + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + default: + break; + } + +#endif /* HAVE_ASPM_QUIRKS */ + err = pci_request_selected_regions(pdev, + pci_select_bars(pdev, + IORESOURCE_MEM), + igb_driver_name); + if (err) + goto err_pci_reg; + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); + + err = -ENOMEM; +#ifdef HAVE_TX_MQ + netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), + IGB_MAX_TX_QUEUES); +#else + netdev = alloc_etherdev(sizeof(struct igb_adapter)); +#endif /* HAVE_TX_MQ */ + if (!netdev) + goto err_alloc_etherdev; + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + pci_set_drvdata(pdev, netdev); + adapter = netdev_priv(netdev); + adapter->netdev = netdev; + adapter->pdev = pdev; + hw = &adapter->hw; + hw->back = adapter; + adapter->port_num = hw->bus.func; + adapter->msg_enable = (1 << debug) - 1; + +#ifdef HAVE_PCI_ERS + err = pci_save_state(pdev); + if (err) + goto err_ioremap; +#endif + err = -EIO; + hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!hw->hw_addr) + goto err_ioremap; + +#ifdef HAVE_NET_DEVICE_OPS + netdev->netdev_ops = &igb_netdev_ops; +#else /* HAVE_NET_DEVICE_OPS */ + netdev->open = &igb_open; + netdev->stop = &igb_close; + netdev->get_stats = &igb_get_stats; +#ifdef HAVE_SET_RX_MODE + netdev->set_rx_mode = &igb_set_rx_mode; +#endif + netdev->set_multicast_list = &igb_set_rx_mode; + netdev->set_mac_address = &igb_set_mac; + netdev->change_mtu = &igb_change_mtu; + netdev->do_ioctl = &igb_ioctl; +#ifdef HAVE_TX_TIMEOUT + netdev->tx_timeout = &igb_tx_timeout; +#endif + netdev->vlan_rx_register = igb_vlan_mode; + netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid; + netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + netdev->poll_controller = igb_netpoll; +#endif + netdev->hard_start_xmit = &igb_xmit_frame; +#endif /* HAVE_NET_DEVICE_OPS */ + igb_set_ethtool_ops(netdev); +#ifdef HAVE_TX_TIMEOUT + netdev->watchdog_timeo = 5 * HZ; +#endif + + strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); + + adapter->bd_number = cards_found; + + /* setup the private structure */ + err = igb_sw_init(adapter); + if (err) + goto err_sw_init; + + e1000_get_bus_info(hw); + + hw->phy.autoneg_wait_to_complete = FALSE; + hw->mac.adaptive_ifs = FALSE; + + /* Copper options */ + if (hw->phy.media_type == e1000_media_type_copper) { + hw->phy.mdix = AUTO_ALL_MODES; + hw->phy.disable_polarity_correction = FALSE; + hw->phy.ms_type = e1000_ms_hw_default; + } + + if (e1000_check_reset_block(hw)) + dev_info(pci_dev_to_dev(pdev), + "PHY reset is blocked due to SOL/IDER session.\n"); + + /* + * features is initialized to 0 in allocation, it might have bits + * set by igb_sw_init so we should use an or instead of an + * assignment. + */ + netdev->features |= NETIF_F_SG | + NETIF_F_IP_CSUM | +#ifdef NETIF_F_IPV6_CSUM + NETIF_F_IPV6_CSUM | +#endif +#ifdef NETIF_F_TSO + NETIF_F_TSO | +#ifdef NETIF_F_TSO6 + NETIF_F_TSO6 | +#endif +#endif /* NETIF_F_TSO */ +#ifdef NETIF_F_RXHASH + NETIF_F_RXHASH | +#endif + NETIF_F_RXCSUM | +#ifdef NETIF_F_HW_VLAN_CTAG_RX + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; +#else + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_TX; +#endif + + if (hw->mac.type >= e1000_82576) + netdev->features |= NETIF_F_SCTP_CSUM; + +#ifdef HAVE_NDO_SET_FEATURES + /* copy netdev features into list of user selectable features */ + netdev->hw_features |= netdev->features; +#ifndef IGB_NO_LRO + + /* give us the option of enabling LRO later */ + netdev->hw_features |= NETIF_F_LRO; +#endif +#else +#ifdef NETIF_F_GRO + + /* this is only needed on kernels prior to 2.6.39 */ + netdev->features |= NETIF_F_GRO; +#endif +#endif + + /* set this bit last since it cannot be part of hw_features */ +#ifdef NETIF_F_HW_VLAN_CTAG_FILTER + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; +#else + netdev->features |= NETIF_F_HW_VLAN_FILTER; +#endif + +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_SG; + +#endif + if (pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + + adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); +#ifdef DEBUG + if (adapter->dmac != IGB_DMAC_DISABLE) + printk("%s: DMA Coalescing is enabled..\n", netdev->name); +#endif + + /* before reading the NVM, reset the controller to put the device in a + * known good starting state */ + e1000_reset_hw(hw); + + /* make sure the NVM is good */ + if (e1000_validate_nvm_checksum(hw) < 0) { + dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not" + " Valid\n"); + err = -EIO; + goto err_eeprom; + } + + /* copy the MAC address out of the NVM */ + if (e1000_read_mac_addr(hw)) + dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n"); + memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); +#ifdef ETHTOOL_GPERMADDR + memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); + + if (!is_valid_ether_addr(netdev->perm_addr)) { +#else + if (!is_valid_ether_addr(netdev->dev_addr)) { +#endif + dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n"); + err = -EIO; + goto err_eeprom; + } + + memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len); + adapter->mac_table[0].queue = adapter->vfs_allocated_count; + adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE); + igb_rar_set(adapter, 0); + + /* get firmware version for ethtool -i */ + igb_set_fw_version(adapter); + + /* Check if Media Autosense is enabled */ + if (hw->mac.type == e1000_82580) + igb_init_mas(adapter); + setup_timer(&adapter->watchdog_timer, &igb_watchdog, + (unsigned long) adapter); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer, + (unsigned long) adapter); + setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, + (unsigned long) adapter); + + INIT_WORK(&adapter->reset_task, igb_reset_task); + INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + INIT_WORK(&adapter->dma_err_task, igb_dma_err_task); + + /* Initialize link properties that are user-changeable */ + adapter->fc_autoneg = true; + hw->mac.autoneg = true; + hw->phy.autoneg_advertised = 0x2f; + + hw->fc.requested_mode = e1000_fc_default; + hw->fc.current_mode = e1000_fc_default; + + e1000_validate_mdi_setting(hw); + + /* By default, support wake on port A */ + if (hw->bus.func == 0) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* Check the NVM for wake support for non-port A ports */ + if (hw->mac.type >= e1000_82580) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &eeprom_data); + else if (hw->bus.func == 1) + e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); + + if (eeprom_data & IGB_EEPROM_APME) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* now that we have the eeprom settings, apply the special cases where + * the eeprom may be wrong or the board simply won't support wake on + * lan on a particular port */ + switch (pdev->device) { + case E1000_DEV_ID_82575GB_QUAD_COPPER: + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: + /* Wake events only supported on port A for dual fiber + * regardless of eeprom setting */ + if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + else + adapter->flags |= IGB_FLAG_QUAD_PORT_A; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; + default: + /* If the device can't wake, don't set software support */ + if (!device_can_wakeup(&adapter->pdev->dev)) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + } + + /* initialize the wol settings based on the eeprom settings */ + if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) + adapter->wol |= E1000_WUFC_MAG; + + /* Some vendors want WoL disabled by default, but still supported */ + if ((hw->mac.type == e1000_i350) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + + device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), + adapter->flags & IGB_FLAG_WOL_SUPPORTED); + + /* reset the hardware with the new settings */ + igb_reset(adapter); + adapter->devrc = 0; + +#ifdef HAVE_I2C_SUPPORT + /* Init the I2C interface */ + err = igb_init_i2c(adapter); + if (err) { + dev_err(&pdev->dev, "failed to init i2c interface\n"); + goto err_eeprom; + } +#endif /* HAVE_I2C_SUPPORT */ + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + + strncpy(netdev->name, "eth%d", IFNAMSIZ); + err = register_netdev(netdev); + if (err) + goto err_register; + +#ifdef CONFIG_IGB_VMDQ_NETDEV + err = igb_init_vmdq_netdevs(adapter); + if (err) + goto err_register; +#endif + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + +#ifdef IGB_DCA + if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) { + adapter->flags |= IGB_FLAG_DCA_ENABLED; + dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); + igb_setup_dca(adapter); + } + +#endif +#ifdef HAVE_PTP_1588_CLOCK + /* do hw tstamp init after resetting */ + igb_ptp_init(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + + dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n"); + /* print bus type/speed/width info */ + dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ", + netdev->name, + ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" : + (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown"), + ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : + (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : + (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown")); + dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name); + for (i = 0; i < 6; i++) + printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); + + ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH); + if (ret_val) + strncpy(pba_str, "Unknown", sizeof(pba_str) - 1); + dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name, + pba_str); + + + /* Initialize the thermal sensor on i350 devices. */ + if (hw->mac.type == e1000_i350) { + if (hw->bus.func == 0) { + u16 ets_word; + + /* + * Read the NVM to determine if this i350 device + * supports an external thermal sensor. + */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word); + if (ets_word != 0x0000 && ets_word != 0xFFFF) + adapter->ets = true; + else + adapter->ets = false; + } +#ifdef IGB_HWMON + + igb_sysfs_init(adapter); +#else +#ifdef IGB_PROCFS + + igb_procfs_init(adapter); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + } else { + adapter->ets = false; + } + + if (hw->phy.media_type == e1000_media_type_copper) { + switch (hw->mac.type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + /* Enable EEE for internal copper PHY devices */ + err = e1000_set_eee_i350(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + break; + case e1000_i354: + if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) & + (E1000_CTRL_EXT_LINK_MODE_SGMII)) { + err = e1000_set_eee_i354(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + } + break; + default: + break; + } + } + + /* send driver version info to firmware */ + if (hw->mac.type >= e1000_i350) + igb_init_fw(adapter); + +#ifndef IGB_NO_LRO + if (netdev->features & NETIF_F_LRO) + dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n"); + else + dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n"); +#endif + dev_info(pci_dev_to_dev(pdev), + "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", + adapter->msix_entries ? "MSI-X" : + (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", + adapter->num_rx_queues, adapter->num_tx_queues); + + cards_found++; + + pm_runtime_put_noidle(&pdev->dev); + return 0; + +err_register: + igb_release_hw_control(adapter); +#ifdef HAVE_I2C_SUPPORT + memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); +#endif /* HAVE_I2C_SUPPORT */ +err_eeprom: + if (!e1000_check_reset_block(hw)) + e1000_phy_hw_reset(hw); + + if (hw->flash_address) + iounmap(hw->flash_address); +err_sw_init: + igb_clear_interrupt_scheme(adapter); + igb_reset_sriov_capability(adapter); + iounmap(hw->hw_addr); +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); +err_pci_reg: +err_dma: + pci_disable_device(pdev); + return err; +} +#ifdef HAVE_I2C_SUPPORT +/* + * igb_remove_i2c - Cleanup I2C interface + * @adapter: pointer to adapter structure + * + */ +static void igb_remove_i2c(struct igb_adapter *adapter) +{ + + /* free the adapter bus structure */ + i2c_del_adapter(&adapter->i2c_adap); +} +#endif /* HAVE_I2C_SUPPORT */ + +/** + * igb_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * igb_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +static void __devexit igb_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + pm_runtime_get_noresume(&pdev->dev); +#ifdef HAVE_I2C_SUPPORT + igb_remove_i2c(adapter); +#endif /* HAVE_I2C_SUPPORT */ +#ifdef HAVE_PTP_1588_CLOCK + igb_ptp_stop(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + + /* flush_scheduled work may reschedule our watchdog task, so + * explicitly disable watchdog tasks from being rescheduled */ + set_bit(__IGB_DOWN, &adapter->state); + del_timer_sync(&adapter->watchdog_timer); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + del_timer_sync(&adapter->dma_err_timer); + del_timer_sync(&adapter->phy_info_timer); + + flush_scheduled_work(); + +#ifdef IGB_DCA + if (adapter->flags & IGB_FLAG_DCA_ENABLED) { + dev_info(pci_dev_to_dev(pdev), "DCA disabled\n"); + dca_remove_requester(&pdev->dev); + adapter->flags &= ~IGB_FLAG_DCA_ENABLED; + E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE); + } +#endif + + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. */ + igb_release_hw_control(adapter); + + unregister_netdev(netdev); +#ifdef CONFIG_IGB_VMDQ_NETDEV + igb_remove_vmdq_netdevs(adapter); +#endif + + igb_clear_interrupt_scheme(adapter); + igb_reset_sriov_capability(adapter); + + iounmap(hw->hw_addr); + if (hw->flash_address) + iounmap(hw->flash_address); + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); + +#ifdef IGB_HWMON + igb_sysfs_exit(adapter); +#else +#ifdef IGB_PROCFS + igb_procfs_exit(adapter); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + kfree(adapter->mac_table); + kfree(adapter->shadow_vfta); + free_netdev(netdev); + + pci_disable_pcie_error_reporting(pdev); + + pci_disable_device(pdev); +} + +/** + * igb_sw_init - Initialize general software structures (struct igb_adapter) + * @adapter: board private structure to initialize + * + * igb_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). + **/ +static int igb_sw_init(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + + /* PCI config space info */ + + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + + pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); + + /* set default ring sizes */ + adapter->tx_ring_count = IGB_DEFAULT_TXD; + adapter->rx_ring_count = IGB_DEFAULT_RXD; + + /* set default work limits */ + adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; + + adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + + VLAN_HLEN; + + /* Initialize the hardware-specific values */ + if (e1000_setup_init_funcs(hw, TRUE)) { + dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n"); + return -EIO; + } + + adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) * + hw->mac.rar_entry_count, + GFP_ATOMIC); + + /* Setup and initialize a copy of the hw vlan table array */ + adapter->shadow_vfta = kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES, + GFP_ATOMIC); +#ifdef NO_KNI + /* These calls may decrease the number of queues */ + if (hw->mac.type < e1000_i210) { + igb_set_sriov_capability(adapter); + } + + if (igb_init_interrupt_scheme(adapter, true)) { + dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + /* Explicitly disable IRQ since the NIC can be in any state. */ + igb_irq_disable(adapter); + + set_bit(__IGB_DOWN, &adapter->state); +#endif + return 0; +} + +/** + * igb_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + **/ +static int __igb_open(struct net_device *netdev, bool resuming) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; +#ifdef CONFIG_PM_RUNTIME + struct pci_dev *pdev = adapter->pdev; +#endif /* CONFIG_PM_RUNTIME */ + int err; + int i; + + /* disallow open during test */ + if (test_bit(__IGB_TESTING, &adapter->state)) { + WARN_ON(resuming); + return -EBUSY; + } + +#ifdef CONFIG_PM_RUNTIME + if (!resuming) + pm_runtime_get_sync(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + netif_carrier_off(netdev); + + /* allocate transmit descriptors */ + err = igb_setup_all_tx_resources(adapter); + if (err) + goto err_setup_tx; + + /* allocate receive descriptors */ + err = igb_setup_all_rx_resources(adapter); + if (err) + goto err_setup_rx; + + igb_power_up_link(adapter); + + /* before we allocate an interrupt, we must be ready to handle it. + * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt + * as soon as we call pci_request_irq, so we have to setup our + * clean_rx handler before we do so. */ + igb_configure(adapter); + + err = igb_request_irq(adapter); + if (err) + goto err_req_irq; + + /* Notify the stack of the actual queue counts. */ + netif_set_real_num_tx_queues(netdev, + adapter->vmdq_pools ? 1 : + adapter->num_tx_queues); + + err = netif_set_real_num_rx_queues(netdev, + adapter->vmdq_pools ? 1 : + adapter->num_rx_queues); + if (err) + goto err_set_queues; + + /* From here on the code is the same as igb_up() */ + clear_bit(__IGB_DOWN, &adapter->state); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_enable(&(adapter->q_vector[i]->napi)); + igb_configure_lli(adapter); + + /* Clear any pending interrupts. */ + E1000_READ_REG(hw, E1000_ICR); + + igb_irq_enable(adapter); + + /* notify VFs that reset has been completed */ + if (adapter->vfs_allocated_count) { + u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data); + } + + netif_tx_start_all_queues(netdev); + + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + schedule_work(&adapter->dma_err_task); + + /* start the watchdog. */ + hw->mac.get_link_status = 1; + schedule_work(&adapter->watchdog_task); + + return E1000_SUCCESS; + +err_set_queues: + igb_free_irq(adapter); +err_req_irq: + igb_release_hw_control(adapter); + igb_power_down_link(adapter); + igb_free_all_rx_resources(adapter); +err_setup_rx: + igb_free_all_tx_resources(adapter); +err_setup_tx: + igb_reset(adapter); + +#ifdef CONFIG_PM_RUNTIME + if (!resuming) + pm_runtime_put(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + return err; +} + +static int igb_open(struct net_device *netdev) +{ + return __igb_open(netdev, false); +} + +/** + * igb_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the driver's control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + **/ +static int __igb_close(struct net_device *netdev, bool suspending) +{ + struct igb_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_PM_RUNTIME + struct pci_dev *pdev = adapter->pdev; +#endif /* CONFIG_PM_RUNTIME */ + + WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); + +#ifdef CONFIG_PM_RUNTIME + if (!suspending) + pm_runtime_get_sync(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + igb_down(adapter); + + igb_release_hw_control(adapter); + + igb_free_irq(adapter); + + igb_free_all_tx_resources(adapter); + igb_free_all_rx_resources(adapter); + +#ifdef CONFIG_PM_RUNTIME + if (!suspending) + pm_runtime_put_sync(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + return 0; +} + +static int igb_close(struct net_device *netdev) +{ + return __igb_close(netdev, false); +} + +/** + * igb_setup_tx_resources - allocate Tx resources (Descriptors) + * @tx_ring: tx descriptor ring (for a specific queue) to setup + * + * Return 0 on success, negative on failure + **/ +int igb_setup_tx_resources(struct igb_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + int size; + + size = sizeof(struct igb_tx_buffer) * tx_ring->count; + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + goto err; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + + tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); + + if (!tx_ring->desc) + goto err; + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + + return 0; + +err: + vfree(tx_ring->tx_buffer_info); + dev_err(dev, + "Unable to allocate memory for the transmit descriptor ring\n"); + return -ENOMEM; +} + +/** + * igb_setup_all_tx_resources - wrapper to allocate Tx resources + * (Descriptors) for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + **/ +static int igb_setup_all_tx_resources(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int i, err = 0; + + for (i = 0; i < adapter->num_tx_queues; i++) { + err = igb_setup_tx_resources(adapter->tx_ring[i]); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "Allocation for Tx Queue %u failed\n", i); + for (i--; i >= 0; i--) + igb_free_tx_resources(adapter->tx_ring[i]); + break; + } + } + + return err; +} + +/** + * igb_setup_tctl - configure the transmit control registers + * @adapter: Board private structure + **/ +void igb_setup_tctl(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 tctl; + + /* disable queue 0 which is enabled by default on 82575 and 82576 */ + E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0); + + /* Program the Transmit Control Register */ + tctl = E1000_READ_REG(hw, E1000_TCTL); + tctl &= ~E1000_TCTL_CT; + tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | + (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); + + e1000_config_collision_dist(hw); + + /* Enable transmits */ + tctl |= E1000_TCTL_EN; + + E1000_WRITE_REG(hw, E1000_TCTL, tctl); +} + +static u32 igb_tx_wthresh(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + switch (hw->mac.type) { + case e1000_i354: + return 4; + case e1000_82576: + if (adapter->msix_entries) + return 1; + default: + break; + } + + return 16; +} + +/** + * igb_configure_tx_ring - Configure transmit ring after Reset + * @adapter: board private structure + * @ring: tx ring to configure + * + * Configure a transmit ring after a reset. + **/ +void igb_configure_tx_ring(struct igb_adapter *adapter, + struct igb_ring *ring) +{ + struct e1000_hw *hw = &adapter->hw; + u32 txdctl = 0; + u64 tdba = ring->dma; + int reg_idx = ring->reg_idx; + + /* disable the queue */ + E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0); + E1000_WRITE_FLUSH(hw); + mdelay(10); + + E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx), + ring->count * sizeof(union e1000_adv_tx_desc)); + E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx), + tdba & 0x00000000ffffffffULL); + E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32); + + ring->tail = hw->hw_addr + E1000_TDT(reg_idx); + E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0); + writel(0, ring->tail); + + txdctl |= IGB_TX_PTHRESH; + txdctl |= IGB_TX_HTHRESH << 8; + txdctl |= igb_tx_wthresh(adapter) << 16; + + txdctl |= E1000_TXDCTL_QUEUE_ENABLE; + E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl); +} + +/** + * igb_configure_tx - Configure transmit Unit after Reset + * @adapter: board private structure + * + * Configure the Tx unit of the MAC after a reset. + **/ +static void igb_configure_tx(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igb_configure_tx_ring(adapter, adapter->tx_ring[i]); +} + +/** + * igb_setup_rx_resources - allocate Rx resources (Descriptors) + * @rx_ring: rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + **/ +int igb_setup_rx_resources(struct igb_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + int size, desc_len; + + size = sizeof(struct igb_rx_buffer) * rx_ring->count; + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + goto err; + + desc_len = sizeof(union e1000_adv_rx_desc); + + /* Round up to nearest 4K */ + rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = ALIGN(rx_ring->size, 4096); + + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + + if (!rx_ring->desc) + goto err; + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + + return 0; + +err: + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the receive descriptor" + " ring\n"); + return -ENOMEM; +} + +/** + * igb_setup_all_rx_resources - wrapper to allocate Rx resources + * (Descriptors) for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + **/ +static int igb_setup_all_rx_resources(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int i, err = 0; + + for (i = 0; i < adapter->num_rx_queues; i++) { + err = igb_setup_rx_resources(adapter->rx_ring[i]); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "Allocation for Rx Queue %u failed\n", i); + for (i--; i >= 0; i--) + igb_free_rx_resources(adapter->rx_ring[i]); + break; + } + } + + return err; +} + +/** + * igb_setup_mrqc - configure the multiple receive queue control registers + * @adapter: Board private structure + **/ +static void igb_setup_mrqc(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 mrqc, rxcsum; + u32 j, num_rx_queues, shift = 0, shift2 = 0; + static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, + 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, + 0xA32DCB77, 0x0CF23080, 0x3BB7426A, + 0xFA01ACBE }; + + /* Fill out hash function seeds */ + for (j = 0; j < 10; j++) + E1000_WRITE_REG(hw, E1000_RSSRK(j), rsskey[j]); + + num_rx_queues = adapter->rss_queues; + + /* 82575 and 82576 supports 2 RSS queues for VMDq */ + switch (hw->mac.type) { + case e1000_82575: + if (adapter->vmdq_pools) { + shift = 2; + shift2 = 6; + break; + } + shift = 6; + break; + case e1000_82576: + /* 82576 supports 2 RSS queues for SR-IOV */ + if (adapter->vfs_allocated_count || adapter->vmdq_pools) { + shift = 3; + num_rx_queues = 2; + } + break; + default: + break; + } + + /* + * Populate the redirection table 4 entries at a time. To do this + * we are generating the results for n and n+2 and then interleaving + * those with the results with n+1 and n+3. + */ + for (j = 0; j < 32; j++) { + /* first pass generates n and n+2 */ + u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; + u32 reta = (base & 0x07800780) >> (7 - shift); + + /* second pass generates n+1 and n+3 */ + base += 0x00010001 * num_rx_queues; + reta |= (base & 0x07800780) << (1 + shift); + + /* generate 2nd table for 82575 based parts */ + if (shift2) + reta |= (0x01010101 * num_rx_queues) << shift2; + + E1000_WRITE_REG(hw, E1000_RETA(j), reta); + } + + /* + * Disable raw packet checksumming so that RSS hash is placed in + * descriptor on writeback. No need to enable TCP/UDP/IP checksum + * offloads as they are enabled by default + */ + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); + rxcsum |= E1000_RXCSUM_PCSD; + + if (adapter->hw.mac.type >= e1000_82576) + /* Enable Receive Checksum Offload for SCTP */ + rxcsum |= E1000_RXCSUM_CRCOFL; + + /* Don't need to set TUOFL or IPOFL, they default to 1 */ + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); + + /* Generate RSS hash based on packet types, TCP/UDP + * port numbers and/or IPv4/v6 src and dst addresses + */ + mrqc = E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; + + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; + + /* If VMDq is enabled then we set the appropriate mode for that, else + * we default to RSS so that an RSS hash is calculated per packet even + * if we are only using one queue */ + if (adapter->vfs_allocated_count || adapter->vmdq_pools) { + if (hw->mac.type > e1000_82575) { + /* Set the default pool for the PF's first queue */ + u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL); + vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | + E1000_VT_CTL_DISABLE_DEF_POOL); + vtctl |= adapter->vfs_allocated_count << + E1000_VT_CTL_DEFAULT_POOL_SHIFT; + E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl); + } else if (adapter->rss_queues > 1) { + /* set default queue for pool 1 to queue 2 */ + E1000_WRITE_REG(hw, E1000_VT_CTL, + adapter->rss_queues << 7); + } + if (adapter->rss_queues > 1) + mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + else + mrqc |= E1000_MRQC_ENABLE_VMDQ; + } else { + mrqc |= E1000_MRQC_ENABLE_RSS_4Q; + } + igb_vmm_control(adapter); + + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); +} + +/** + * igb_setup_rctl - configure the receive control registers + * @adapter: Board private structure + **/ +void igb_setup_rctl(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rctl; + + rctl = E1000_READ_REG(hw, E1000_RCTL); + + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); + + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + + /* + * enable stripping of CRC. It's unlikely this will break BMC + * redirection as it did with e1000. Newer features require + * that the HW strips the CRC. + */ + rctl |= E1000_RCTL_SECRC; + + /* disable store bad packets and clear size bits. */ + rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); + + /* enable LPE to prevent packets larger than max_frame_size */ + rctl |= E1000_RCTL_LPE; + + /* disable queue 0 to prevent tail write w/o re-config */ + E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0); + + /* Attention!!! For SR-IOV PF driver operations you must enable + * queue drop for all VF and PF queues to prevent head of line blocking + * if an un-trusted VF does not provide descriptors to hardware. + */ + if (adapter->vfs_allocated_count) { + /* set all queue drop enable bits */ + E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES); + } + + E1000_WRITE_REG(hw, E1000_RCTL, rctl); +} + +static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, + int vfn) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vmolr; + + /* if it isn't the PF check to see if VFs are enabled and + * increase the size to support vlan tags */ + if (vfn < adapter->vfs_allocated_count && + adapter->vf_data[vfn].vlans_enabled) + size += VLAN_HLEN; + +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (vfn >= adapter->vfs_allocated_count) { + int queue = vfn - adapter->vfs_allocated_count; + struct igb_vmdq_adapter *vadapter; + + vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]); + if (vadapter->vlgrp) + size += VLAN_HLEN; + } +#endif + vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn)); + vmolr &= ~E1000_VMOLR_RLPML_MASK; + vmolr |= size | E1000_VMOLR_LPE; + E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); + + return 0; +} + +/** + * igb_rlpml_set - set maximum receive packet size + * @adapter: board private structure + * + * Configure maximum receivable packet size. + **/ +static void igb_rlpml_set(struct igb_adapter *adapter) +{ + u32 max_frame_size = adapter->max_frame_size; + struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; + + if (adapter->vmdq_pools && hw->mac.type != e1000_82575) { + int i; + for (i = 0; i < adapter->vmdq_pools; i++) + igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i); + /* + * If we're in VMDQ or SR-IOV mode, then set global RLPML + * to our max jumbo frame size, in case we need to enable + * jumbo frames on one of the rings later. + * This will not pass over-length frames into the default + * queue because it's gated by the VMOLR.RLPML. + */ + max_frame_size = MAX_JUMBO_FRAME_SIZE; + } + /* Set VF RLPML for the PF device. */ + if (adapter->vfs_allocated_count) + igb_set_vf_rlpml(adapter, max_frame_size, pf_id); + + E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size); +} + +static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter, + int vfn, bool enable) +{ + struct e1000_hw *hw = &adapter->hw; + u32 val; + void __iomem *reg; + + if (hw->mac.type < e1000_82576) + return; + + if (hw->mac.type == e1000_i350) + reg = hw->hw_addr + E1000_DVMOLR(vfn); + else + reg = hw->hw_addr + E1000_VMOLR(vfn); + + val = readl(reg); + if (enable) + val |= E1000_VMOLR_STRVLAN; + else + val &= ~(E1000_VMOLR_STRVLAN); + writel(val, reg); +} +static inline void igb_set_vmolr(struct igb_adapter *adapter, + int vfn, bool aupe) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vmolr; + + /* + * This register exists only on 82576 and newer so if we are older then + * we should exit and do nothing + */ + if (hw->mac.type < e1000_82576) + return; + + vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn)); + + if (aupe) + vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ + else + vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ + + /* clear all bits that might not be set */ + vmolr &= ~E1000_VMOLR_RSSE; + + if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) + vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ + + vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ + vmolr |= E1000_VMOLR_LPE; /* Accept long packets */ + + E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); +} + +/** + * igb_configure_rx_ring - Configure a receive ring after Reset + * @adapter: board private structure + * @ring: receive ring to be configured + * + * Configure the Rx unit of the MAC after a reset. + **/ +void igb_configure_rx_ring(struct igb_adapter *adapter, + struct igb_ring *ring) +{ + struct e1000_hw *hw = &adapter->hw; + u64 rdba = ring->dma; + int reg_idx = ring->reg_idx; + u32 srrctl = 0, rxdctl = 0; + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* + * RLPML prevents us from receiving a frame larger than max_frame so + * it is safe to just set the rx_buffer_len to max_frame without the + * risk of an skb over panic. + */ + ring->rx_buffer_len = max_t(u32, adapter->max_frame_size, + MAXIMUM_ETHERNET_VLAN_SIZE); + +#endif + /* disable the queue */ + E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0); + + /* Set DMA base address registers */ + E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx), + rdba & 0x00000000ffffffffULL); + E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32); + E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx), + ring->count * sizeof(union e1000_adv_rx_desc)); + + /* initialize head and tail */ + ring->tail = hw->hw_addr + E1000_RDT(reg_idx); + E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0); + writel(0, ring->tail); + + /* reset next-to- use/clean to place SW in sync with hardwdare */ + ring->next_to_clean = 0; + ring->next_to_use = 0; +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + ring->next_to_alloc = 0; + +#endif + /* set descriptor configuration */ +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; + srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT; +#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + srrctl = ALIGN(ring->rx_buffer_len, 1024) >> + E1000_SRRCTL_BSIZEPKT_SHIFT; +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; +#ifdef HAVE_PTP_1588_CLOCK + if (hw->mac.type >= e1000_82580) + srrctl |= E1000_SRRCTL_TIMESTAMP; +#endif /* HAVE_PTP_1588_CLOCK */ + /* + * We should set the drop enable bit if: + * SR-IOV is enabled + * or + * Flow Control is disabled and number of RX queues > 1 + * + * This allows us to avoid head of line blocking for security + * and performance reasons. + */ + if (adapter->vfs_allocated_count || + (adapter->num_rx_queues > 1 && + (hw->fc.requested_mode == e1000_fc_none || + hw->fc.requested_mode == e1000_fc_rx_pause))) + srrctl |= E1000_SRRCTL_DROP_EN; + + E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl); + + /* set filtering for VMDQ pools */ + igb_set_vmolr(adapter, reg_idx & 0x7, true); + + rxdctl |= IGB_RX_PTHRESH; + rxdctl |= IGB_RX_HTHRESH << 8; + rxdctl |= IGB_RX_WTHRESH << 16; + + /* enable receive descriptor fetching */ + rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; + E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl); +} + +/** + * igb_configure_rx - Configure receive Unit after Reset + * @adapter: board private structure + * + * Configure the Rx unit of the MAC after a reset. + **/ +static void igb_configure_rx(struct igb_adapter *adapter) +{ + int i; + + /* set UTA to appropriate mode */ + igb_set_uta(adapter); + + igb_full_sync_mac_table(adapter); + /* Setup the HW Rx Head and Tail Descriptor Pointers and + * the Base and Length of the Rx Descriptor Ring */ + for (i = 0; i < adapter->num_rx_queues; i++) + igb_configure_rx_ring(adapter, adapter->rx_ring[i]); +} + +/** + * igb_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + **/ +void igb_free_tx_resources(struct igb_ring *tx_ring) +{ + igb_clean_tx_ring(tx_ring); + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!tx_ring->desc) + return; + + dma_free_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc, tx_ring->dma); + + tx_ring->desc = NULL; +} + +/** + * igb_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + **/ +static void igb_free_all_tx_resources(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igb_free_tx_resources(adapter->tx_ring[i]); +} + +void igb_unmap_and_free_tx_resource(struct igb_ring *ring, + struct igb_tx_buffer *tx_buffer) +{ + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* buffer_info must be completely set up in the transmit path */ +} + +/** + * igb_clean_tx_ring - Free Tx Buffers + * @tx_ring: ring to be cleaned + **/ +static void igb_clean_tx_ring(struct igb_ring *tx_ring) +{ + struct igb_tx_buffer *buffer_info; + unsigned long size; + u16 i; + + if (!tx_ring->tx_buffer_info) + return; + /* Free all the Tx ring sk_buffs */ + + for (i = 0; i < tx_ring->count; i++) { + buffer_info = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, buffer_info); + } + + netdev_tx_reset_queue(txring_txq(tx_ring)); + + size = sizeof(struct igb_tx_buffer) * tx_ring->count; + memset(tx_ring->tx_buffer_info, 0, size); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; +} + +/** + * igb_clean_all_tx_rings - Free Tx Buffers for all queues + * @adapter: board private structure + **/ +static void igb_clean_all_tx_rings(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igb_clean_tx_ring(adapter->tx_ring[i]); +} + +/** + * igb_free_rx_resources - Free Rx Resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + **/ +void igb_free_rx_resources(struct igb_ring *rx_ring) +{ + igb_clean_rx_ring(rx_ring); + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!rx_ring->desc) + return; + + dma_free_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc, rx_ring->dma); + + rx_ring->desc = NULL; +} + +/** + * igb_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + **/ +static void igb_free_all_rx_resources(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + igb_free_rx_resources(adapter->rx_ring[i]); +} + +/** + * igb_clean_rx_ring - Free Rx Buffers per Queue + * @rx_ring: ring to free buffers from + **/ +void igb_clean_rx_ring(struct igb_ring *rx_ring) +{ + unsigned long size; + u16 i; + + if (!rx_ring->rx_buffer_info) + return; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + if (rx_ring->skb) + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + +#endif + /* Free all the Rx ring sk_buffs */ + for (i = 0; i < rx_ring->count; i++) { + struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + if (buffer_info->dma) { + dma_unmap_single(rx_ring->dev, + buffer_info->dma, + rx_ring->rx_buffer_len, + DMA_FROM_DEVICE); + buffer_info->dma = 0; + } + + if (buffer_info->skb) { + dev_kfree_skb(buffer_info->skb); + buffer_info->skb = NULL; + } +#else + if (!buffer_info->page) + continue; + + dma_unmap_page(rx_ring->dev, + buffer_info->dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(buffer_info->page); + + buffer_info->page = NULL; +#endif + } + + size = sizeof(struct igb_rx_buffer) * rx_ring->count; + memset(rx_ring->rx_buffer_info, 0, size); + + /* Zero out the descriptor ring */ + memset(rx_ring->desc, 0, rx_ring->size); + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; +} + +/** + * igb_clean_all_rx_rings - Free Rx Buffers for all queues + * @adapter: board private structure + **/ +static void igb_clean_all_rx_rings(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + igb_clean_rx_ring(adapter->rx_ring[i]); +} + +/** + * igb_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int igb_set_mac(struct net_device *netdev, void *p) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + igb_del_mac_filter(adapter, hw->mac.addr, + adapter->vfs_allocated_count); + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + + /* set the correct pool for the new PF MAC address in entry 0 */ + return igb_add_mac_filter(adapter, hw->mac.addr, + adapter->vfs_allocated_count); +} + +/** + * igb_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure + * + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA + **/ +int igb_write_mc_addr_list(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; +#ifdef NETDEV_HW_ADDR_T_MULTICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + u8 *mta_list; + int i, count; +#ifdef CONFIG_IGB_VMDQ_NETDEV + int vm; +#endif + count = netdev_mc_count(netdev); +#ifdef CONFIG_IGB_VMDQ_NETDEV + for (vm = 1; vm < adapter->vmdq_pools; vm++) { + if (!adapter->vmdq_netdev[vm]) + break; + if (!netif_running(adapter->vmdq_netdev[vm])) + continue; + count += netdev_mc_count(adapter->vmdq_netdev[vm]); + } +#endif + + if (!count) { + e1000_update_mc_addr_list(hw, NULL, 0); + return 0; + } + mta_list = kzalloc(count * 6, GFP_ATOMIC); + if (!mta_list) + return -ENOMEM; + + /* The shared function expects a packed array of only addresses. */ + i = 0; + netdev_for_each_mc_addr(ha, netdev) +#ifdef NETDEV_HW_ADDR_T_MULTICAST + memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); +#else + memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN); +#endif +#ifdef CONFIG_IGB_VMDQ_NETDEV + for (vm = 1; vm < adapter->vmdq_pools; vm++) { + if (!adapter->vmdq_netdev[vm]) + break; + if (!netif_running(adapter->vmdq_netdev[vm]) || + !netdev_mc_count(adapter->vmdq_netdev[vm])) + continue; + netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm]) +#ifdef NETDEV_HW_ADDR_T_MULTICAST + memcpy(mta_list + (i++ * ETH_ALEN), + ha->addr, ETH_ALEN); +#else + memcpy(mta_list + (i++ * ETH_ALEN), + ha->dmi_addr, ETH_ALEN); +#endif + } +#endif + e1000_update_mc_addr_list(hw, mta_list, i); + kfree(mta_list); + + return count; +} + +void igb_rar_set(struct igb_adapter *adapter, u32 index) +{ + u32 rar_low, rar_high; + struct e1000_hw *hw = &adapter->hw; + u8 *addr = adapter->mac_table[index].addr; + /* HW expects these in little endian so we reverse the byte order + * from network order (big endian) to little endian + */ + rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | + ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); + rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + + /* Indicate to hardware the Address is Valid. */ + if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) + rar_high |= E1000_RAH_AV; + + if (hw->mac.type == e1000_82575) + rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue; + else + rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue; + + E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); + E1000_WRITE_FLUSH(hw); + E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); + E1000_WRITE_FLUSH(hw); +} + +void igb_full_sync_mac_table(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.rar_entry_count; i++) { + igb_rar_set(adapter, i); + } +} + +void igb_sync_mac_table(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED) + igb_rar_set(adapter, i); + adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED); + } +} + +int igb_available_rars(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i, count = 0; + + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (adapter->mac_table[i].state == 0) + count++; + } + return count; +} + +#ifdef HAVE_SET_RX_MODE +/** + * igb_write_uc_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure + * + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table + **/ +static int igb_write_uc_addr_list(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + unsigned int vfn = adapter->vfs_allocated_count; + int count = 0; + + /* return ENOMEM indicating insufficient memory for addresses */ + if (netdev_uc_count(netdev) > igb_available_rars(adapter)) + return -ENOMEM; + if (!netdev_uc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + netdev_for_each_uc_addr(ha, netdev) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + igb_del_mac_filter(adapter, ha->addr, vfn); + igb_add_mac_filter(adapter, ha->addr, vfn); +#else + igb_del_mac_filter(adapter, ha->da_addr, vfn); + igb_add_mac_filter(adapter, ha->da_addr, vfn); +#endif + count++; + } + } + return count; +} + +#endif /* HAVE_SET_RX_MODE */ +/** + * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure + * + * The set_rx_mode entry point is called whenever the unicast or multicast + * address lists or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast, + * promiscuous mode, and all-multi behavior. + **/ +static void igb_set_rx_mode(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + unsigned int vfn = adapter->vfs_allocated_count; + u32 rctl, vmolr = 0; + int count; + + /* Check for Promiscuous and All Multicast modes */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + + /* clear the effected bits */ + rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); + + if (netdev->flags & IFF_PROMISC) { + rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); + vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); + /* retain VLAN HW filtering if in VT mode */ + if (adapter->vfs_allocated_count || adapter->vmdq_pools) + rctl |= E1000_RCTL_VFE; + } else { + if (netdev->flags & IFF_ALLMULTI) { + rctl |= E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + } else { + /* + * Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + count = igb_write_mc_addr_list(netdev); + if (count < 0) { + rctl |= E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + } else if (count) { + vmolr |= E1000_VMOLR_ROMPE; + } + } +#ifdef HAVE_SET_RX_MODE + /* + * Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = igb_write_uc_addr_list(netdev); + if (count < 0) { + rctl |= E1000_RCTL_UPE; + vmolr |= E1000_VMOLR_ROPE; + } +#endif /* HAVE_SET_RX_MODE */ + rctl |= E1000_RCTL_VFE; + } + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + + /* + * In order to support SR-IOV and eventually VMDq it is necessary to set + * the VMOLR to enable the appropriate modes. Without this workaround + * we will have issues with VLAN tag stripping not being done for frames + * that are only arriving because we are the default pool + */ + if (hw->mac.type < e1000_82576) + return; + + vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) & + ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); + igb_restore_vf_multicasts(adapter); +} + +static void igb_check_wvbr(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 wvbr = 0; + + switch (hw->mac.type) { + case e1000_82576: + case e1000_i350: + if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR))) + return; + break; + default: + break; + } + + adapter->wvbr |= wvbr; +} + +#define IGB_STAGGERED_QUEUE_OFFSET 8 + +static void igb_spoof_check(struct igb_adapter *adapter) +{ + int j; + + if (!adapter->wvbr) + return; + + switch (adapter->hw.mac.type) { + case e1000_82576: + for (j = 0; j < adapter->vfs_allocated_count; j++) { + if (adapter->wvbr & (1 << j) || + adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { + DPRINTK(DRV, WARNING, + "Spoof event(s) detected on VF %d\n", j); + adapter->wvbr &= + ~((1 << j) | + (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))); + } + } + break; + case e1000_i350: + for (j = 0; j < adapter->vfs_allocated_count; j++) { + if (adapter->wvbr & (1 << j)) { + DPRINTK(DRV, WARNING, + "Spoof event(s) detected on VF %d\n", j); + adapter->wvbr &= ~(1 << j); + } + } + break; + default: + break; + } +} + +/* Need to wait a few seconds after link up to get diagnostic information from + * the phy */ +static void igb_update_phy_info(unsigned long data) +{ + struct igb_adapter *adapter = (struct igb_adapter *) data; + e1000_get_phy_info(&adapter->hw); +} + +/** + * igb_has_link - check shared code for link and determine up/down + * @adapter: pointer to driver private info + **/ +bool igb_has_link(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + bool link_active = FALSE; + + /* get_link_status is set on LSC (link status) interrupt or + * rx sequence error interrupt. get_link_status will stay + * false until the e1000_check_for_link establishes link + * for copper adapters ONLY + */ + switch (hw->phy.media_type) { + case e1000_media_type_copper: + if (!hw->mac.get_link_status) + return true; + case e1000_media_type_internal_serdes: + e1000_check_for_link(hw); + link_active = !hw->mac.get_link_status; + break; + case e1000_media_type_unknown: + default: + break; + } + + if (((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) && + (hw->phy.id == I210_I_PHY_ID)) { + if (!netif_carrier_ok(adapter->netdev)) { + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) { + adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE; + adapter->link_check_timeout = jiffies; + } + } + + return link_active; +} + +/** + * igb_watchdog - Timer Call-back + * @data: pointer to adapter cast into an unsigned long + **/ +static void igb_watchdog(unsigned long data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + /* Do the rest outside of interrupt context */ + schedule_work(&adapter->watchdog_task); +} + +static void igb_watchdog_task(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, + struct igb_adapter, + watchdog_task); + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + u32 link; + int i; + u32 thstat, ctrl_ext; + u32 connsw; + + link = igb_has_link(adapter); + /* Force link down if we have fiber to swap to */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + if (hw->phy.media_type == e1000_media_type_copper) { + connsw = E1000_READ_REG(hw, E1000_CONNSW); + if (!(connsw & E1000_CONNSW_AUTOSENSE_EN)) + link = 0; + } + } + + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { + if (time_after(jiffies, (adapter->link_check_timeout + HZ))) + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + else + link = FALSE; + } + + if (link) { + /* Perform a reset if the media type changed. */ + if (hw->dev_spec._82575.media_changed) { + hw->dev_spec._82575.media_changed = false; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + igb_reset(adapter); + } + + /* Cancel scheduled suspend requests. */ + pm_runtime_resume(netdev->dev.parent); + + if (!netif_carrier_ok(netdev)) { + u32 ctrl; + e1000_get_speed_and_duplex(hw, + &adapter->link_speed, + &adapter->link_duplex); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + /* Links status message must follow this format */ + printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, " + "Flow Control: %s\n", + netdev->name, + adapter->link_speed, + adapter->link_duplex == FULL_DUPLEX ? + "Full Duplex" : "Half Duplex", + ((ctrl & E1000_CTRL_TFCE) && + (ctrl & E1000_CTRL_RFCE)) ? "RX/TX": + ((ctrl & E1000_CTRL_RFCE) ? "RX" : + ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None"))); + /* adjust timeout factor according to speed/duplex */ + adapter->tx_timeout_factor = 1; + switch (adapter->link_speed) { + case SPEED_10: + adapter->tx_timeout_factor = 14; + break; + case SPEED_100: + /* maybe add some timeout factor ? */ + break; + default: + break; + } + + netif_carrier_on(netdev); + netif_tx_wake_all_queues(netdev); + + igb_ping_all_vfs(adapter); +#ifdef IFLA_VF_MAX + igb_check_vf_rate_limit(adapter); +#endif /* IFLA_VF_MAX */ + + /* link state has changed, schedule phy info update */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->phy_info_timer, + round_jiffies(jiffies + 2 * HZ)); + } + } else { + if (netif_carrier_ok(netdev)) { + adapter->link_speed = 0; + adapter->link_duplex = 0; + /* check for thermal sensor event on i350 */ + if (hw->mac.type == e1000_i350) { + thstat = E1000_READ_REG(hw, E1000_THSTAT); + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + if ((hw->phy.media_type == + e1000_media_type_copper) && + !(ctrl_ext & + E1000_CTRL_EXT_LINK_MODE_SGMII)) { + if (thstat & E1000_THSTAT_PWR_DOWN) { + printk(KERN_ERR "igb: %s The " + "network adapter was stopped " + "because it overheated.\n", + netdev->name); + } + if (thstat & E1000_THSTAT_LINK_THROTTLE) { + printk(KERN_INFO + "igb: %s The network " + "adapter supported " + "link speed " + "was downshifted " + "because it " + "overheated.\n", + netdev->name); + } + } + } + + /* Links status message must follow this format */ + printk(KERN_INFO "igb: %s NIC Link is Down\n", + netdev->name); + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + + igb_ping_all_vfs(adapter); + + /* link state has changed, schedule phy info update */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->phy_info_timer, + round_jiffies(jiffies + 2 * HZ)); + /* link is down, time to check for alternate media */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } + pm_schedule_suspend(netdev->dev.parent, + MSEC_PER_SEC * 5); + + /* also check for alternate media here */ + } else if (!netif_carrier_ok(netdev) && + (adapter->flags & IGB_FLAG_MAS_ENABLE)) { + hw->mac.ops.power_up_serdes(hw); + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } + } + + igb_update_stats(adapter); + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *tx_ring = adapter->tx_ring[i]; + if (!netif_carrier_ok(netdev)) { + /* We've lost link, so the controller stops DMA, + * but we've got queued Tx work that's never going + * to get done, so reset controller to flush Tx. + * (Do the reset outside of interrupt context). */ + if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); + /* return immediately since reset is imminent */ + return; + } + } + + /* Force detection of hung controller every watchdog period */ + set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); + } + + /* Cause software interrupt to ensure rx ring is cleaned */ + if (adapter->msix_entries) { + u32 eics = 0; + for (i = 0; i < adapter->num_q_vectors; i++) + eics |= adapter->q_vector[i]->eims_value; + E1000_WRITE_REG(hw, E1000_EICS, eics); + } else { + E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0); + } + + igb_spoof_check(adapter); + + /* Reset the timer */ + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + HZ)); + else + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + 2 * HZ)); + } +} + +static void igb_dma_err_task(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, + struct igb_adapter, + dma_err_task); + int vf; + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + u32 hgptc; + u32 ciaa, ciad; + + hgptc = E1000_READ_REG(hw, E1000_HGPTC); + if (hgptc) /* If incrementing then no need for the check below */ + goto dma_timer_reset; + /* + * Check to see if a bad DMA write target from an errant or + * malicious VF has caused a PCIe error. If so then we can + * issue a VFLR to the offending VF(s) and then resume without + * requesting a full slot reset. + */ + + for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { + ciaa = (vf << 16) | 0x80000000; + /* 32 bit read so align, we really want status at offset 6 */ + ciaa |= PCI_COMMAND; + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + ciad = E1000_READ_REG(hw, E1000_CIAD); + ciaa &= 0x7FFFFFFF; + /* disable debug mode asap after reading data */ + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + /* Get the upper 16 bits which will be the PCI status reg */ + ciad >>= 16; + if (ciad & (PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_REC_TARGET_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR)) { + netdev_err(netdev, "VF %d suffered error\n", vf); + /* Issue VFLR */ + ciaa = (vf << 16) | 0x80000000; + ciaa |= 0xA8; + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + ciad = 0x00008000; /* VFLR */ + E1000_WRITE_REG(hw, E1000_CIAD, ciad); + ciaa &= 0x7FFFFFFF; + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + } + } +dma_timer_reset: + /* Reset the timer */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->dma_err_timer, + round_jiffies(jiffies + HZ / 10)); +} + +/** + * igb_dma_err_timer - Timer Call-back + * @data: pointer to adapter cast into an unsigned long + **/ +static void igb_dma_err_timer(unsigned long data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + /* Do the rest outside of interrupt context */ + schedule_work(&adapter->dma_err_task); +} + +enum latency_range { + lowest_latency = 0, + low_latency = 1, + bulk_latency = 2, + latency_invalid = 255 +}; + +/** + * igb_update_ring_itr - update the dynamic ITR value based on packet size + * + * Stores a new ITR value based on strictly on packet size. This + * algorithm is less sophisticated than that used in igb_update_itr, + * due to the difficulty of synchronizing statistics across multiple + * receive rings. The divisors and thresholds used by this function + * were determined based on theoretical maximum wire speed and testing + * data, in order to minimize response time while increasing bulk + * throughput. + * This functionality is controlled by the InterruptThrottleRate module + * parameter (see igb_param.c) + * NOTE: This function is called only when operating in a multiqueue + * receive environment. + * @q_vector: pointer to q_vector + **/ +static void igb_update_ring_itr(struct igb_q_vector *q_vector) +{ + int new_val = q_vector->itr_val; + int avg_wire_size = 0; + struct igb_adapter *adapter = q_vector->adapter; + unsigned int packets; + + /* For non-gigabit speeds, just fix the interrupt rate at 4000 + * ints/sec - ITR timer value of 120 ticks. + */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + new_val = IGB_4K_ITR; + goto set_itr_val; + default: + break; + } + + packets = q_vector->rx.total_packets; + if (packets) + avg_wire_size = q_vector->rx.total_bytes / packets; + + packets = q_vector->tx.total_packets; + if (packets) + avg_wire_size = max_t(u32, avg_wire_size, + q_vector->tx.total_bytes / packets); + + /* if avg_wire_size isn't set no work was done */ + if (!avg_wire_size) + goto clear_counts; + + /* Add 24 bytes to size to account for CRC, preamble, and gap */ + avg_wire_size += 24; + + /* Don't starve jumbo frames */ + avg_wire_size = min(avg_wire_size, 3000); + + /* Give a little boost to mid-size frames */ + if ((avg_wire_size > 300) && (avg_wire_size < 1200)) + new_val = avg_wire_size / 3; + else + new_val = avg_wire_size / 2; + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (new_val < IGB_20K_ITR && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + new_val = IGB_20K_ITR; + +set_itr_val: + if (new_val != q_vector->itr_val) { + q_vector->itr_val = new_val; + q_vector->set_itr = 1; + } +clear_counts: + q_vector->rx.total_bytes = 0; + q_vector->rx.total_packets = 0; + q_vector->tx.total_bytes = 0; + q_vector->tx.total_packets = 0; +} + +/** + * igb_update_itr - update the dynamic ITR value based on statistics + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * this functionality is controlled by the InterruptThrottleRate module + * parameter (see igb_param.c) + * NOTE: These calculations are only valid when operating in a single- + * queue environment. + * @q_vector: pointer to q_vector + * @ring_container: ring info to update the itr for + **/ +static void igb_update_itr(struct igb_q_vector *q_vector, + struct igb_ring_container *ring_container) +{ + unsigned int packets = ring_container->total_packets; + unsigned int bytes = ring_container->total_bytes; + u8 itrval = ring_container->itr; + + /* no packets, exit with status unchanged */ + if (packets == 0) + return; + + switch (itrval) { + case lowest_latency: + /* handle TSO and jumbo frames */ + if (bytes/packets > 8000) + itrval = bulk_latency; + else if ((packets < 5) && (bytes > 512)) + itrval = low_latency; + break; + case low_latency: /* 50 usec aka 20000 ints/s */ + if (bytes > 10000) { + /* this if handles the TSO accounting */ + if (bytes/packets > 8000) { + itrval = bulk_latency; + } else if ((packets < 10) || ((bytes/packets) > 1200)) { + itrval = bulk_latency; + } else if ((packets > 35)) { + itrval = lowest_latency; + } + } else if (bytes/packets > 2000) { + itrval = bulk_latency; + } else if (packets <= 2 && bytes < 512) { + itrval = lowest_latency; + } + break; + case bulk_latency: /* 250 usec aka 4000 ints/s */ + if (bytes > 25000) { + if (packets > 35) + itrval = low_latency; + } else if (bytes < 1500) { + itrval = low_latency; + } + break; + } + + /* clear work counters since we have the values we need */ + ring_container->total_bytes = 0; + ring_container->total_packets = 0; + + /* write updated itr to ring container */ + ring_container->itr = itrval; +} + +static void igb_set_itr(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + u32 new_itr = q_vector->itr_val; + u8 current_itr = 0; + + /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + current_itr = 0; + new_itr = IGB_4K_ITR; + goto set_itr_now; + default: + break; + } + + igb_update_itr(q_vector, &q_vector->tx); + igb_update_itr(q_vector, &q_vector->rx); + + current_itr = max(q_vector->rx.itr, q_vector->tx.itr); + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (current_itr == lowest_latency && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + current_itr = low_latency; + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ + break; + case low_latency: + new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ + break; + case bulk_latency: + new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ + break; + default: + break; + } + +set_itr_now: + if (new_itr != q_vector->itr_val) { + /* this attempts to bias the interrupt rate towards Bulk + * by adding intermediate steps when interrupt rate is + * increasing */ + new_itr = new_itr > q_vector->itr_val ? + max((new_itr * q_vector->itr_val) / + (new_itr + (q_vector->itr_val >> 2)), + new_itr) : + new_itr; + /* Don't write the value here; it resets the adapter's + * internal timer, and causes us to delay far longer than + * we should between interrupts. Instead, we write the ITR + * value at the beginning of the next interrupt so the timing + * ends up being correct. + */ + q_vector->itr_val = new_itr; + q_vector->set_itr = 1; + } +} + +void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, + u32 type_tucmd, u32 mss_l4len_idx) +{ + struct e1000_adv_tx_context_desc *context_desc; + u16 i = tx_ring->next_to_use; + + context_desc = IGB_TX_CTXTDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* set bits to identify this as an advanced context descriptor */ + type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + /* For 82575, context index must be unique per ring. */ + if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + mss_l4len_idx |= tx_ring->reg_idx << 4; + + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->seqnum_seed = 0; + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); +} + +static int igb_tso(struct igb_ring *tx_ring, + struct igb_tx_buffer *first, + u8 *hdr_len) +{ +#ifdef NETIF_F_TSO + struct sk_buff *skb = first->skb; + u32 vlan_macip_lens, type_tucmd; + u32 mss_l4len_idx, l4len; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) +#endif /* NETIF_F_TSO */ + return 0; +#ifdef NETIF_F_TSO + + if (skb_header_cloned(skb)) { + int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) + return err; + } + + /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + + if (first->protocol == __constant_htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + iph->tot_len = 0; + iph->check = 0; + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, + 0); + type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; + first->tx_flags |= IGB_TX_FLAGS_TSO | + IGB_TX_FLAGS_CSUM | + IGB_TX_FLAGS_IPV4; +#ifdef NETIF_F_TSO6 + } else if (skb_is_gso_v6(skb)) { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + first->tx_flags |= IGB_TX_FLAGS_TSO | + IGB_TX_FLAGS_CSUM; +#endif + } + + /* compute header lengths */ + l4len = tcp_hdrlen(skb); + *hdr_len = skb_transport_offset(skb) + l4len; + + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + + /* MSS L4LEN IDX */ + mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; + mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; + + /* VLAN MACLEN IPLEN */ + vlan_macip_lens = skb_network_header_len(skb); + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; + + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); + + return 1; +#endif /* NETIF_F_TSO */ +} + +static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) +{ + struct sk_buff *skb = first->skb; + u32 vlan_macip_lens = 0; + u32 mss_l4len_idx = 0; + u32 type_tucmd = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) + return; + } else { + u8 nexthdr = 0; + switch (first->protocol) { + case __constant_htons(ETH_P_IP): + vlan_macip_lens |= skb_network_header_len(skb); + type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; + nexthdr = ip_hdr(skb)->protocol; + break; +#ifdef NETIF_F_IPV6_CSUM + case __constant_htons(ETH_P_IPV6): + vlan_macip_lens |= skb_network_header_len(skb); + nexthdr = ipv6_hdr(skb)->nexthdr; + break; +#endif + default: + if (unlikely(net_ratelimit())) { + dev_warn(tx_ring->dev, + "partial checksum but proto=%x!\n", + first->protocol); + } + break; + } + + switch (nexthdr) { + case IPPROTO_TCP: + type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; + mss_l4len_idx = tcp_hdrlen(skb) << + E1000_ADVTXD_L4LEN_SHIFT; + break; +#ifdef HAVE_SCTP + case IPPROTO_SCTP: + type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; + mss_l4len_idx = sizeof(struct sctphdr) << + E1000_ADVTXD_L4LEN_SHIFT; + break; +#endif + case IPPROTO_UDP: + mss_l4len_idx = sizeof(struct udphdr) << + E1000_ADVTXD_L4LEN_SHIFT; + break; + default: + if (unlikely(net_ratelimit())) { + dev_warn(tx_ring->dev, + "partial checksum but l4 proto=%x!\n", + nexthdr); + } + break; + } + + /* update TX checksum flag */ + first->tx_flags |= IGB_TX_FLAGS_CSUM; + } + + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; + + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); +} + +#define IGB_SET_FLAG(_input, _flag, _result) \ + ((_flag <= _result) ? \ + ((u32)(_input & _flag) * (_result / _flag)) : \ + ((u32)(_input & _flag) / (_flag / _result))) + +static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) +{ + /* set type for advanced descriptor with frame checksum insertion */ + u32 cmd_type = E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_DEXT | + E1000_ADVTXD_DCMD_IFCS; + + /* set HW vlan bit if vlan is present */ + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN, + (E1000_ADVTXD_DCMD_VLE)); + + /* set segmentation bits for TSO */ + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO, + (E1000_ADVTXD_DCMD_TSE)); + + /* set timestamp bit if present */ + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP, + (E1000_ADVTXD_MAC_TSTAMP)); + + return cmd_type; +} + +static void igb_tx_olinfo_status(struct igb_ring *tx_ring, + union e1000_adv_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) +{ + u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; + + /* 82575 requires a unique index per ring */ + if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + olinfo_status |= tx_ring->reg_idx << 4; + + /* insert L4 checksum */ + olinfo_status |= IGB_SET_FLAG(tx_flags, + IGB_TX_FLAGS_CSUM, + (E1000_TXD_POPTS_TXSM << 8)); + + /* insert IPv4 checksum */ + olinfo_status |= IGB_SET_FLAG(tx_flags, + IGB_TX_FLAGS_IPV4, + (E1000_TXD_POPTS_IXSM << 8)); + + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); +} + +static void igb_tx_map(struct igb_ring *tx_ring, + struct igb_tx_buffer *first, + const u8 hdr_len) +{ + struct sk_buff *skb = first->skb; + struct igb_tx_buffer *tx_buffer; + union e1000_adv_tx_desc *tx_desc; + struct skb_frag_struct *frag; + dma_addr_t dma; + unsigned int data_len, size; + u32 tx_flags = first->tx_flags; + u32 cmd_type = igb_tx_cmd_type(skb, tx_flags); + u16 i = tx_ring->next_to_use; + + tx_desc = IGB_TX_DESC(tx_ring, i); + + igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); + + size = skb_headlen(skb); + data_len = skb->data_len; + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buffer = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGB_TX_DESC(tx_ring, 0); + i = 0; + } + tx_desc->read.olinfo_status = 0; + + dma += IGB_MAX_DATA_PER_TXD; + size -= IGB_MAX_DATA_PER_TXD; + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGB_TX_DESC(tx_ring, 0); + i = 0; + } + tx_desc->read.olinfo_status = 0; + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, + size, DMA_TO_DEVICE); + + tx_buffer = &tx_ring->tx_buffer_info[i]; + } + + /* write last descriptor with RS and EOP bits */ + cmd_type |= size | IGB_TXD_DCMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); + /* set the timestamp */ + first->time_stamp = jiffies; + + /* + * Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. (Only applicable for weak-ordered + * memory model archs, such as IA-64). + * + * We also need this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it syncronizes IO on IA64/Altix systems */ + mmiowb(); + + return; + +dma_error: + dev_err(tx_ring->dev, "TX DMA map failed\n"); + + /* clear dma mappings for failed tx_buffer_info map */ + for (;;) { + tx_buffer = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) + break; + if (i == 0) + i = tx_ring->count; + i--; + } + + tx_ring->next_to_use = i; +} + +static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) +{ + struct net_device *netdev = netdev_ring(tx_ring); + + if (netif_is_multiqueue(netdev)) + netif_stop_subqueue(netdev, ring_queue_index(tx_ring)); + else + netif_stop_queue(netdev); + + /* Herbert's original patch had: + * smp_mb__after_netif_stop_queue(); + * but since that doesn't exist yet, just open code it. */ + smp_mb(); + + /* We need to check again in a case another CPU has just + * made room available. */ + if (igb_desc_unused(tx_ring) < size) + return -EBUSY; + + /* A reprieve! */ + if (netif_is_multiqueue(netdev)) + netif_wake_subqueue(netdev, ring_queue_index(tx_ring)); + else + netif_wake_queue(netdev); + + tx_ring->tx_stats.restart_queue++; + + return 0; +} + +static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) +{ + if (igb_desc_unused(tx_ring) >= size) + return 0; + return __igb_maybe_stop_tx(tx_ring, size); +} + +netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, + struct igb_ring *tx_ring) +{ + struct igb_tx_buffer *first; + int tso; + u32 tx_flags = 0; +#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD + unsigned short f; +#endif + u16 count = TXD_USE_COUNT(skb_headlen(skb)); + __be16 protocol = vlan_get_protocol(skb); + u8 hdr_len = 0; + + /* + * need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD, + * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD, + * + 2 desc gap to keep tail from touching head, + * + 1 desc for context descriptor, + * otherwise try next time + */ +#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); +#else + count += skb_shinfo(skb)->nr_frags; +#endif + if (igb_maybe_stop_tx(tx_ring, count + 3)) { + /* this is a hard error */ + return NETDEV_TX_BUSY; + } + + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + + skb_tx_timestamp(skb); + +#ifdef HAVE_PTP_1588_CLOCK + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); + if (!adapter->ptp_tx_skb) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IGB_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + if (adapter->hw.mac.type == e1000_82576) + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + if (vlan_tx_tag_present(skb)) { + tx_flags |= IGB_TX_FLAGS_VLAN; + tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); + } + + /* record initial flags and protocol */ + first->tx_flags = tx_flags; + first->protocol = protocol; + + tso = igb_tso(tx_ring, first, &hdr_len); + if (tso < 0) + goto out_drop; + else if (!tso) + igb_tx_csum(tx_ring, first); + + igb_tx_map(tx_ring, first, hdr_len); + +#ifndef HAVE_TRANS_START_IN_QUEUE + netdev_ring(tx_ring)->trans_start = jiffies; + +#endif + /* Make sure there is space in the ring for the next send. */ + igb_maybe_stop_tx(tx_ring, DESC_NEEDED); + + return NETDEV_TX_OK; + +out_drop: + igb_unmap_and_free_tx_resource(tx_ring, first); + + return NETDEV_TX_OK; +} + +#ifdef HAVE_TX_MQ +static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, + struct sk_buff *skb) +{ + unsigned int r_idx = skb->queue_mapping; + + if (r_idx >= adapter->num_tx_queues) + r_idx = r_idx % adapter->num_tx_queues; + + return adapter->tx_ring[r_idx]; +} +#else +#define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0] +#endif + +static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + if (skb->len <= 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* + * The minimum packet size with TCTL.PSP set is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + + return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); +} + +/** + * igb_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + **/ +static void igb_tx_timeout(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + /* Do the reset outside of interrupt context */ + adapter->tx_timeout_count++; + + if (hw->mac.type >= e1000_82580) + hw->dev_spec._82575.global_device_reset = true; + + schedule_work(&adapter->reset_task); + E1000_WRITE_REG(hw, E1000_EICS, + (adapter->eims_enable_mask & ~adapter->eims_other)); +} + +static void igb_reset_task(struct work_struct *work) +{ + struct igb_adapter *adapter; + adapter = container_of(work, struct igb_adapter, reset_task); + + igb_reinit_locked(adapter); +} + +/** + * igb_get_stats - Get System Network Statistics + * @netdev: network interface device structure + * + * Returns the address of the device statistics structure. + * The statistics are updated here and also from the timer callback. + **/ +static struct net_device_stats *igb_get_stats(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (!test_bit(__IGB_RESETTING, &adapter->state)) + igb_update_stats(adapter); + +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + /* only return the current stats */ + return &netdev->stats; +#else + /* only return the current stats */ + return &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ +} + +/** + * igb_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + **/ +static int igb_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct pci_dev *pdev = adapter->pdev; + int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + + if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { + dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n"); + return -EINVAL; + } + +#define MAX_STD_JUMBO_FRAME_SIZE 9238 + if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { + dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n"); + return -EINVAL; + } + + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; + + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + /* igb_down has a dependency on max_frame_size */ + adapter->max_frame_size = max_frame; + + if (netif_running(netdev)) + igb_down(adapter); + + dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); + netdev->mtu = new_mtu; + hw->dev_spec._82575.mtu = new_mtu; + + if (netif_running(netdev)) + igb_up(adapter); + else + igb_reset(adapter); + + clear_bit(__IGB_RESETTING, &adapter->state); + + return 0; +} + +/** + * igb_update_stats - Update the board statistics counters + * @adapter: board private structure + **/ + +void igb_update_stats(struct igb_adapter *adapter) +{ +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &adapter->netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ + struct e1000_hw *hw = &adapter->hw; +#ifdef HAVE_PCI_ERS + struct pci_dev *pdev = adapter->pdev; +#endif + u32 reg, mpc; + u16 phy_tmp; + int i; + u64 bytes, packets; +#ifndef IGB_NO_LRO + u32 flushed = 0, coal = 0; + struct igb_q_vector *q_vector; +#endif + +#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF + + /* + * Prevent stats update while adapter is being reset, or if the pci + * connection is down. + */ + if (adapter->link_speed == 0) + return; +#ifdef HAVE_PCI_ERS + if (pci_channel_offline(pdev)) + return; + +#endif +#ifndef IGB_NO_LRO + for (i = 0; i < adapter->num_q_vectors; i++) { + q_vector = adapter->q_vector[i]; + if (!q_vector) + continue; + flushed += q_vector->lrolist.stats.flushed; + coal += q_vector->lrolist.stats.coal; + } + adapter->lro_stats.flushed = flushed; + adapter->lro_stats.coal = coal; + +#endif + bytes = 0; + packets = 0; + for (i = 0; i < adapter->num_rx_queues; i++) { + u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF; + struct igb_ring *ring = adapter->rx_ring[i]; + ring->rx_stats.drops += rqdpc_tmp; + net_stats->rx_fifo_errors += rqdpc_tmp; +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (!ring->vmdq_netdev) { + bytes += ring->rx_stats.bytes; + packets += ring->rx_stats.packets; + } +#else + bytes += ring->rx_stats.bytes; + packets += ring->rx_stats.packets; +#endif + } + + net_stats->rx_bytes = bytes; + net_stats->rx_packets = packets; + + bytes = 0; + packets = 0; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *ring = adapter->tx_ring[i]; +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (!ring->vmdq_netdev) { + bytes += ring->tx_stats.bytes; + packets += ring->tx_stats.packets; + } +#else + bytes += ring->tx_stats.bytes; + packets += ring->tx_stats.packets; +#endif + } + net_stats->tx_bytes = bytes; + net_stats->tx_packets = packets; + + /* read stats registers */ + adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); + adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC); + adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL); + E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */ + adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC); + adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC); + adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC); + + adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64); + adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127); + adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255); + adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511); + adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023); + adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522); + adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS); + adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC); + + mpc = E1000_READ_REG(hw, E1000_MPC); + adapter->stats.mpc += mpc; + net_stats->rx_fifo_errors += mpc; + adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC); + adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL); + adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC); + adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL); + adapter->stats.dc += E1000_READ_REG(hw, E1000_DC); + adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC); + adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC); + adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC); + adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC); + adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); + adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC); + adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC); + adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL); + E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */ + adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC); + adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC); + adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC); + adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC); + adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH); + adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH); + adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR); + + adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64); + adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127); + adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255); + adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511); + adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); + adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); + + adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC); + adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC); + + adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT); + adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC); + + adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); + /* read internal phy sepecific stats */ + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { + adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC); + + /* this stat has invalid values on i210/i211 */ + if ((hw->mac.type != e1000_i210) && + (hw->mac.type != e1000_i211)) + adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS); + } + adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC); + adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); + + adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC); + adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); + adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); + adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); + adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); + adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); + adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); + adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); + adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); + + /* Fill out the OS statistics structure */ + net_stats->multicast = adapter->stats.mprc; + net_stats->collisions = adapter->stats.colc; + + /* Rx Errors */ + + /* RLEC on some newer hardware can be incorrect so build + * our own version based on RUC and ROC */ + net_stats->rx_errors = adapter->stats.rxerrc + + adapter->stats.crcerrs + adapter->stats.algnerrc + + adapter->stats.ruc + adapter->stats.roc + + adapter->stats.cexterr; + net_stats->rx_length_errors = adapter->stats.ruc + + adapter->stats.roc; + net_stats->rx_crc_errors = adapter->stats.crcerrs; + net_stats->rx_frame_errors = adapter->stats.algnerrc; + net_stats->rx_missed_errors = adapter->stats.mpc; + + /* Tx Errors */ + net_stats->tx_errors = adapter->stats.ecol + + adapter->stats.latecol; + net_stats->tx_aborted_errors = adapter->stats.ecol; + net_stats->tx_window_errors = adapter->stats.latecol; + net_stats->tx_carrier_errors = adapter->stats.tncrs; + + /* Tx Dropped needs to be maintained elsewhere */ + + /* Phy Stats */ + if (hw->phy.media_type == e1000_media_type_copper) { + if ((adapter->link_speed == SPEED_1000) && + (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { + phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; + adapter->phy_stats.idle_errors += phy_tmp; + } + } + + /* Management Stats */ + adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC); + adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC); + if (hw->mac.type > e1000_82580) { + adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC); + adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC); + adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC); + adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC); + } +} + +static irqreturn_t igb_msix_other(int irq, void *data) +{ + struct igb_adapter *adapter = data; + struct e1000_hw *hw = &adapter->hw; + u32 icr = E1000_READ_REG(hw, E1000_ICR); + /* reading ICR causes bit 31 of EICR to be cleared */ + + if (icr & E1000_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + /* The DMA Out of Sync is also indication of a spoof event + * in IOV mode. Check the Wrong VM Behavior register to + * see if it is really a spoof event. */ + igb_check_wvbr(adapter); + } + + /* Check for a mailbox event */ + if (icr & E1000_ICR_VMMB) + igb_msg_task(adapter); + + if (icr & E1000_ICR_LSC) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + +#ifdef HAVE_PTP_1588_CLOCK + if (icr & E1000_ICR_TS) { + u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + /* Check for MDD event */ + if (icr & E1000_ICR_MDDET) + igb_process_mdd_event(adapter); + + E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other); + + return IRQ_HANDLED; +} + +static void igb_write_itr(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + u32 itr_val = q_vector->itr_val & 0x7FFC; + + if (!q_vector->set_itr) + return; + + if (!itr_val) + itr_val = 0x4; + + if (adapter->hw.mac.type == e1000_82575) + itr_val |= itr_val << 16; + else + itr_val |= E1000_EITR_CNT_IGNR; + + writel(itr_val, q_vector->itr_register); + q_vector->set_itr = 0; +} + +static irqreturn_t igb_msix_ring(int irq, void *data) +{ + struct igb_q_vector *q_vector = data; + + /* Write the ITR value calculated from the previous interrupt. */ + igb_write_itr(q_vector); + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +#ifdef IGB_DCA +static void igb_update_tx_dca(struct igb_adapter *adapter, + struct igb_ring *tx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 txctrl = dca3_get_tag(tx_ring->dev, cpu); + + if (hw->mac.type != e1000_82575) + txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT_82576; + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN | + E1000_DCA_TXCTRL_DATA_RRO_EN | + E1000_DCA_TXCTRL_DESC_DCA_EN; + + E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl); +} + +static void igb_update_rx_dca(struct igb_adapter *adapter, + struct igb_ring *rx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu); + + if (hw->mac.type != e1000_82575) + rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT_82576; + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN | + E1000_DCA_RXCTRL_DESC_DCA_EN; + + E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl); +} + +static void igb_update_dca(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + int cpu = get_cpu(); + + if (q_vector->cpu == cpu) + goto out_no_update; + + if (q_vector->tx.ring) + igb_update_tx_dca(adapter, q_vector->tx.ring, cpu); + + if (q_vector->rx.ring) + igb_update_rx_dca(adapter, q_vector->rx.ring, cpu); + + q_vector->cpu = cpu; +out_no_update: + put_cpu(); +} + +static void igb_setup_dca(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + if (!(adapter->flags & IGB_FLAG_DCA_ENABLED)) + return; + + /* Always use CB2 mode, difference is masked in the CB driver. */ + E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); + + for (i = 0; i < adapter->num_q_vectors; i++) { + adapter->q_vector[i]->cpu = -1; + igb_update_dca(adapter->q_vector[i]); + } +} + +static int __igb_notify_dca(struct device *dev, void *data) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + unsigned long event = *(unsigned long *)data; + + switch (event) { + case DCA_PROVIDER_ADD: + /* if already enabled, don't do it again */ + if (adapter->flags & IGB_FLAG_DCA_ENABLED) + break; + if (dca_add_requester(dev) == E1000_SUCCESS) { + adapter->flags |= IGB_FLAG_DCA_ENABLED; + dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); + igb_setup_dca(adapter); + break; + } + /* Fall Through since DCA is disabled. */ + case DCA_PROVIDER_REMOVE: + if (adapter->flags & IGB_FLAG_DCA_ENABLED) { + /* without this a class_device is left + * hanging around in the sysfs model */ + dca_remove_requester(dev); + dev_info(pci_dev_to_dev(pdev), "DCA disabled\n"); + adapter->flags &= ~IGB_FLAG_DCA_ENABLED; + E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE); + } + break; + } + + return E1000_SUCCESS; +} + +static int igb_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int ret_val; + + ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, + __igb_notify_dca); + + return ret_val ? NOTIFY_BAD : NOTIFY_DONE; +} +#endif /* IGB_DCA */ + +static int igb_vf_configure(struct igb_adapter *adapter, int vf) +{ + unsigned char mac_addr[ETH_ALEN]; + + random_ether_addr(mac_addr); + igb_set_vf_mac(adapter, vf, mac_addr); + +#ifdef IFLA_VF_MAX +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + /* By default spoof check is enabled for all VFs */ + adapter->vf_data[vf].spoofchk_enabled = true; +#endif +#endif + + return true; +} + +static void igb_ping_all_vfs(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ping; + int i; + + for (i = 0 ; i < adapter->vfs_allocated_count; i++) { + ping = E1000_PF_CONTROL_MSG; + if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS) + ping |= E1000_VT_MSGTYPE_CTS; + e1000_write_mbx(hw, &ping, 1, i); + } +} + +/** + * igb_mta_set_ - Set multicast filter table address + * @adapter: pointer to the adapter structure + * @hash_value: determines the MTA register and bit to set + * + * The multicast table address is a register array of 32-bit registers. + * The hash_value is used to determine what register the bit is in, the + * current value is read, the new bit is OR'd in and the new value is + * written back into the register. + **/ +void igb_mta_set(struct igb_adapter *adapter, u32 hash_value) +{ + struct e1000_hw *hw = &adapter->hw; + u32 hash_bit, hash_reg, mta; + + /* + * The MTA is a register array of 32-bit registers. It is + * treated like an array of (32*mta_reg_count) bits. We want to + * set bit BitArray[hash_value]. So we figure out what register + * the bit is in, read it, OR in the new bit, then write + * back the new value. The (hw->mac.mta_reg_count - 1) serves as a + * mask to bits 31:5 of the hash value which gives us the + * register we're modifying. The hash bit within that register + * is determined by the lower 5 bits of the hash value. + */ + hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); + hash_bit = hash_value & 0x1F; + + mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg); + + mta |= (1 << hash_bit); + + E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta); + E1000_WRITE_FLUSH(hw); +} + +static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + + struct e1000_hw *hw = &adapter->hw; + u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf)); + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + + vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | + IGB_VF_FLAG_MULTI_PROMISC); + vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); + +#ifdef IGB_ENABLE_VF_PROMISC + if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) { + vmolr |= E1000_VMOLR_ROPE; + vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC; + *msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST; + } +#endif + if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { + vmolr |= E1000_VMOLR_MPME; + vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; + *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; + } else { + /* + * if we have hashes and we are clearing a multicast promisc + * flag we need to write the hashes to the MTA as this step + * was previously skipped + */ + if (vf_data->num_vf_mc_hashes > 30) { + vmolr |= E1000_VMOLR_MPME; + } else if (vf_data->num_vf_mc_hashes) { + int j; + vmolr |= E1000_VMOLR_ROMPE; + for (j = 0; j < vf_data->num_vf_mc_hashes; j++) + igb_mta_set(adapter, vf_data->vf_mc_hashes[j]); + } + } + + E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr); + + /* there are flags left unprocessed, likely not supported */ + if (*msgbuf & E1000_VT_MSGINFO_MASK) + return -EINVAL; + + return 0; + +} + +static int igb_set_vf_multicasts(struct igb_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + u16 *hash_list = (u16 *)&msgbuf[1]; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + int i; + + /* salt away the number of multicast addresses assigned + * to this VF for later use to restore when the PF multi cast + * list changes + */ + vf_data->num_vf_mc_hashes = n; + + /* only up to 30 hash values supported */ + if (n > 30) + n = 30; + + /* store the hashes for later use */ + for (i = 0; i < n; i++) + vf_data->vf_mc_hashes[i] = hash_list[i]; + + /* Flush and reset the mta with the new values */ + igb_set_rx_mode(adapter->netdev); + + return 0; +} + +static void igb_restore_vf_multicasts(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data; + int i, j; + + for (i = 0; i < adapter->vfs_allocated_count; i++) { + u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i)); + vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); + + vf_data = &adapter->vf_data[i]; + + if ((vf_data->num_vf_mc_hashes > 30) || + (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) { + vmolr |= E1000_VMOLR_MPME; + } else if (vf_data->num_vf_mc_hashes) { + vmolr |= E1000_VMOLR_ROMPE; + for (j = 0; j < vf_data->num_vf_mc_hashes; j++) + igb_mta_set(adapter, vf_data->vf_mc_hashes[j]); + } + E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr); + } +} + +static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 pool_mask, reg, vid; + u16 vlan_default; + int i; + + pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + + /* remove the vf from the pool */ + reg &= ~pool_mask; + + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK) && + (reg & E1000_VLVF_VLANID_ENABLE)) { + reg = 0; + vid = reg & E1000_VLVF_VLANID_MASK; + igb_vfta_set(adapter, vid, FALSE); + } + + E1000_WRITE_REG(hw, E1000_VLVF(i), reg); + } + + adapter->vf_data[vf].vlans_enabled = 0; + + vlan_default = adapter->vf_data[vf].default_vf_vlan_id; + if (vlan_default) + igb_vlvf_set(adapter, vlan_default, true, vf); +} + +s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg, i; + + /* The vlvf table only exists on 82576 hardware and newer */ + if (hw->mac.type < e1000_82576) + return -1; + + /* we only need to do this if VMDq is enabled */ + if (!adapter->vmdq_pools) + return -1; + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + if ((reg & E1000_VLVF_VLANID_ENABLE) && + vid == (reg & E1000_VLVF_VLANID_MASK)) + break; + } + + if (add) { + if (i == E1000_VLVF_ARRAY_SIZE) { + /* Did not find a matching VLAN ID entry that was + * enabled. Search for a free filter entry, i.e. + * one without the enable bit set + */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + if (!(reg & E1000_VLVF_VLANID_ENABLE)) + break; + } + } + if (i < E1000_VLVF_ARRAY_SIZE) { + /* Found an enabled/available entry */ + reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* if !enabled we need to set this up in vfta */ + if (!(reg & E1000_VLVF_VLANID_ENABLE)) { + /* add VID to filter table */ + igb_vfta_set(adapter, vid, TRUE); + reg |= E1000_VLVF_VLANID_ENABLE; + } + reg &= ~E1000_VLVF_VLANID_MASK; + reg |= vid; + E1000_WRITE_REG(hw, E1000_VLVF(i), reg); + + /* do not modify RLPML for PF devices */ + if (vf >= adapter->vfs_allocated_count) + return E1000_SUCCESS; + + if (!adapter->vf_data[vf].vlans_enabled) { + u32 size; + reg = E1000_READ_REG(hw, E1000_VMOLR(vf)); + size = reg & E1000_VMOLR_RLPML_MASK; + size += 4; + reg &= ~E1000_VMOLR_RLPML_MASK; + reg |= size; + E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg); + } + + adapter->vf_data[vf].vlans_enabled++; + } + } else { + if (i < E1000_VLVF_ARRAY_SIZE) { + /* remove vf from the pool */ + reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK)) { + reg = 0; + igb_vfta_set(adapter, vid, FALSE); + } + E1000_WRITE_REG(hw, E1000_VLVF(i), reg); + + /* do not modify RLPML for PF devices */ + if (vf >= adapter->vfs_allocated_count) + return E1000_SUCCESS; + + adapter->vf_data[vf].vlans_enabled--; + if (!adapter->vf_data[vf].vlans_enabled) { + u32 size; + reg = E1000_READ_REG(hw, E1000_VMOLR(vf)); + size = reg & E1000_VMOLR_RLPML_MASK; + size -= 4; + reg &= ~E1000_VMOLR_RLPML_MASK; + reg |= size; + E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg); + } + } + } + return E1000_SUCCESS; +} + +#ifdef IFLA_VF_MAX +static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + + if (vid) + E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT)); + else + E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0); +} + +static int igb_ndo_set_vf_vlan(struct net_device *netdev, + int vf, u16 vlan, u8 qos) +{ + int err = 0; + struct igb_adapter *adapter = netdev_priv(netdev); + + /* VLAN IDs accepted range 0-4094 */ + if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7)) + return -EINVAL; + if (vlan || qos) { + err = igb_vlvf_set(adapter, vlan, !!vlan, vf); + if (err) + goto out; + igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vlan); + adapter->vf_data[vf].pf_vlan = vlan; + adapter->vf_data[vf].pf_qos = qos; + igb_set_vf_vlan_strip(adapter, vf, true); + dev_info(&adapter->pdev->dev, + "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF VLAN has been set," + " but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before" + " attempting to use the VF device.\n"); + } + } else { + if (adapter->vf_data[vf].pf_vlan) + dev_info(&adapter->pdev->dev, + "Clearing VLAN on VF %d\n", vf); + igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); + igb_set_vmvir(adapter, vlan, vf); + igb_set_vmolr(adapter, vf, true); + igb_set_vf_vlan_strip(adapter, vf, false); + adapter->vf_data[vf].pf_vlan = 0; + adapter->vf_data[vf].pf_qos = 0; + } +out: + return err; +} + +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 dtxswc, reg_offset; + + if (!adapter->vfs_allocated_count) + return -EOPNOTSUPP; + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + + reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC; + dtxswc = E1000_READ_REG(hw, reg_offset); + if (setting) + dtxswc |= ((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + else + dtxswc &= ~((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + E1000_WRITE_REG(hw, reg_offset, dtxswc); + + adapter->vf_data[vf].spoofchk_enabled = setting; + return E1000_SUCCESS; +} +#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */ +#endif /* IFLA_VF_MAX */ + +static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + u32 reg; + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + if ((reg & E1000_VLVF_VLANID_ENABLE) && + vid == (reg & E1000_VLVF_VLANID_MASK)) + break; + } + + if (i >= E1000_VLVF_ARRAY_SIZE) + i = -1; + + return i; +} + +static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + int err = 0; + + if (vid) + igb_set_vf_vlan_strip(adapter, vf, true); + else + igb_set_vf_vlan_strip(adapter, vf, false); + + /* If in promiscuous mode we need to make sure the PF also has + * the VLAN filter set. + */ + if (add && (adapter->netdev->flags & IFF_PROMISC)) + err = igb_vlvf_set(adapter, vid, add, + adapter->vfs_allocated_count); + if (err) + goto out; + + err = igb_vlvf_set(adapter, vid, add, vf); + + if (err) + goto out; + + /* Go through all the checks to see if the VLAN filter should + * be wiped completely. + */ + if (!add && (adapter->netdev->flags & IFF_PROMISC)) { + u32 vlvf, bits; + + int regndx = igb_find_vlvf_entry(adapter, vid); + if (regndx < 0) + goto out; + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + vlvf = bits = E1000_READ_REG(hw, E1000_VLVF(regndx)); + bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count); + /* If the filter was removed then ensure PF pool bit + * is cleared if the PF only added itself to the pool + * because the PF is in promiscuous mode. + */ + if ((vlvf & VLAN_VID_MASK) == vid && +#ifndef HAVE_VLAN_RX_REGISTER + !test_bit(vid, adapter->active_vlans) && +#endif + !bits) + igb_vlvf_set(adapter, vid, add, + adapter->vfs_allocated_count); + } + +out: + return err; +} + +static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + + /* clear flags except flag that the PF has set the MAC */ + adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; + adapter->vf_data[vf].last_nack = jiffies; + + /* reset offloads to defaults */ + igb_set_vmolr(adapter, vf, true); + + /* reset vlans for device */ + igb_clear_vf_vfta(adapter, vf); +#ifdef IFLA_VF_MAX + if (adapter->vf_data[vf].pf_vlan) + igb_ndo_set_vf_vlan(adapter->netdev, vf, + adapter->vf_data[vf].pf_vlan, + adapter->vf_data[vf].pf_qos); + else + igb_clear_vf_vfta(adapter, vf); +#endif + + /* reset multicast table array for vf */ + adapter->vf_data[vf].num_vf_mc_hashes = 0; + + /* Flush and reset the mta with the new values */ + igb_set_rx_mode(adapter->netdev); + + /* + * Reset the VFs TDWBAL and TDWBAH registers which are not + * cleared by a VFLR + */ + E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0); + E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0); + if (hw->mac.type == e1000_82576) { + E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0); + E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0); + } +} + +static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) +{ + unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; + + /* generate a new mac address as we were hotplug removed/added */ + if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) + random_ether_addr(vf_mac); + + /* process remaining reset events */ + igb_vf_reset(adapter, vf); +} + +static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; + u32 reg, msgbuf[3]; + u8 *addr = (u8 *)(&msgbuf[1]); + + /* process all the same items cleared in a function level reset */ + igb_vf_reset(adapter, vf); + + /* set vf mac address */ + igb_del_mac_filter(adapter, vf_mac, vf); + igb_add_mac_filter(adapter, vf_mac, vf); + + /* enable transmit and receive for vf */ + reg = E1000_READ_REG(hw, E1000_VFTE); + E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf)); + reg = E1000_READ_REG(hw, E1000_VFRE); + E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf)); + + adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; + + /* reply to reset with ack and vf mac address */ + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, 6); + e1000_write_mbx(hw, msgbuf, 3, vf); +} + +static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) +{ + /* + * The VF MAC Address is stored in a packed array of bytes + * starting at the second 32 bit word of the msg array + */ + unsigned char *addr = (unsigned char *)&msg[1]; + int err = -1; + + if (is_valid_ether_addr(addr)) + err = igb_set_vf_mac(adapter, vf, addr); + + return err; +} + +static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + u32 msg = E1000_VT_MSGTYPE_NACK; + + /* if device isn't clear to send it shouldn't be reading either */ + if (!(vf_data->flags & IGB_VF_FLAG_CTS) && + time_after(jiffies, vf_data->last_nack + (2 * HZ))) { + e1000_write_mbx(hw, &msg, 1, vf); + vf_data->last_nack = jiffies; + } +} + +static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) +{ + struct pci_dev *pdev = adapter->pdev; + u32 msgbuf[E1000_VFMAILBOX_SIZE]; + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + s32 retval; + + retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf); + + if (retval) { + dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n"); + return; + } + + /* this is a message we already processed, do nothing */ + if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) + return; + + /* + * until the vf completes a reset it should not be + * allowed to start any configuration. + */ + + if (msgbuf[0] == E1000_VF_RESET) { + igb_vf_reset_msg(adapter, vf); + return; + } + + if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { + msgbuf[0] = E1000_VT_MSGTYPE_NACK; + if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) { + e1000_write_mbx(hw, msgbuf, 1, vf); + vf_data->last_nack = jiffies; + } + return; + } + + switch ((msgbuf[0] & 0xFFFF)) { + case E1000_VF_SET_MAC_ADDR: + retval = -EINVAL; +#ifndef IGB_DISABLE_VF_MAC_SET + if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC)) + retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); + else + DPRINTK(DRV, INFO, + "VF %d attempted to override administratively " + "set MAC address\nReload the VF driver to " + "resume operations\n", vf); +#endif + break; + case E1000_VF_SET_PROMISC: + retval = igb_set_vf_promisc(adapter, msgbuf, vf); + break; + case E1000_VF_SET_MULTICAST: + retval = igb_set_vf_multicasts(adapter, msgbuf, vf); + break; + case E1000_VF_SET_LPE: + retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); + break; + case E1000_VF_SET_VLAN: + retval = -1; +#ifdef IFLA_VF_MAX + if (vf_data->pf_vlan) + DPRINTK(DRV, INFO, + "VF %d attempted to override administratively " + "set VLAN tag\nReload the VF driver to " + "resume operations\n", vf); + else +#endif + retval = igb_set_vf_vlan(adapter, msgbuf, vf); + break; + default: + dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]); + retval = -E1000_ERR_MBX; + break; + } + + /* notify the VF of the results of what it sent us */ + if (retval) + msgbuf[0] |= E1000_VT_MSGTYPE_NACK; + else + msgbuf[0] |= E1000_VT_MSGTYPE_ACK; + + msgbuf[0] |= E1000_VT_MSGTYPE_CTS; + + e1000_write_mbx(hw, msgbuf, 1, vf); +} + +static void igb_msg_task(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vf; + + for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { + /* process any reset requests */ + if (!e1000_check_for_rst(hw, vf)) + igb_vf_reset_event(adapter, vf); + + /* process any messages pending */ + if (!e1000_check_for_msg(hw, vf)) + igb_rcv_msg_from_vf(adapter, vf); + + /* process any acks */ + if (!e1000_check_for_ack(hw, vf)) + igb_rcv_ack_from_vf(adapter, vf); + } +} + +/** + * igb_set_uta - Set unicast filter table address + * @adapter: board private structure + * + * The unicast table address is a register array of 32-bit registers. + * The table is meant to be used in a way similar to how the MTA is used + * however due to certain limitations in the hardware it is necessary to + * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous + * enable bit to allow vlan tag stripping when promiscuous mode is enabled + **/ +static void igb_set_uta(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + /* The UTA table only exists on 82576 hardware and newer */ + if (hw->mac.type < e1000_82576) + return; + + /* we only need to do this if VMDq is enabled */ + if (!adapter->vmdq_pools) + return; + + for (i = 0; i < hw->mac.uta_reg_count; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0); +} + +/** + * igb_intr_msi - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + **/ +static irqreturn_t igb_intr_msi(int irq, void *data) +{ + struct igb_adapter *adapter = data; + struct igb_q_vector *q_vector = adapter->q_vector[0]; + struct e1000_hw *hw = &adapter->hw; + /* read ICR disables interrupts using IAM */ + u32 icr = E1000_READ_REG(hw, E1000_ICR); + + igb_write_itr(q_vector); + + if (icr & E1000_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + hw->mac.get_link_status = 1; + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + +#ifdef HAVE_PTP_1588_CLOCK + if (icr & E1000_ICR_TS) { + u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * igb_intr - Legacy Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + **/ +static irqreturn_t igb_intr(int irq, void *data) +{ + struct igb_adapter *adapter = data; + struct igb_q_vector *q_vector = adapter->q_vector[0]; + struct e1000_hw *hw = &adapter->hw; + /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No + * need for the IMC write */ + u32 icr = E1000_READ_REG(hw, E1000_ICR); + + /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is + * not set, then the adapter didn't send an interrupt */ + if (!(icr & E1000_ICR_INT_ASSERTED)) + return IRQ_NONE; + + igb_write_itr(q_vector); + + if (icr & E1000_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + +#ifdef HAVE_PTP_1588_CLOCK + if (icr & E1000_ICR_TS) { + u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +void igb_ring_irq_enable(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + + if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || + (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { + if ((adapter->num_q_vectors == 1) && !adapter->vf_data) + igb_set_itr(q_vector); + else + igb_update_ring_itr(q_vector); + } + + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->msix_entries) + E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value); + else + igb_irq_enable(adapter); + } +} + +/** + * igb_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle + **/ +static int igb_poll(struct napi_struct *napi, int budget) +{ + struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi); + bool clean_complete = true; + +#ifdef IGB_DCA + if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) + igb_update_dca(q_vector); +#endif + if (q_vector->tx.ring) + clean_complete = igb_clean_tx_irq(q_vector); + + if (q_vector->rx.ring) + clean_complete &= igb_clean_rx_irq(q_vector, budget); + +#ifndef HAVE_NETDEV_NAPI_LIST + /* if netdev is disabled we need to stop polling */ + if (!netif_running(q_vector->adapter->netdev)) + clean_complete = true; + +#endif + /* If all work not completed, return budget and keep polling */ + if (!clean_complete) + return budget; + + /* If not enough Rx work done, exit the polling mode */ + napi_complete(napi); + igb_ring_irq_enable(q_vector); + + return 0; +} + +/** + * igb_clean_tx_irq - Reclaim resources after transmit completes + * @q_vector: pointer to q_vector containing needed info + * returns TRUE if ring is completely cleaned + **/ +static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct igb_ring *tx_ring = q_vector->tx.ring; + struct igb_tx_buffer *tx_buffer; + union e1000_adv_tx_desc *tx_desc; + unsigned int total_bytes = 0, total_packets = 0; + unsigned int budget = q_vector->tx.work_limit; + unsigned int i = tx_ring->next_to_clean; + + if (test_bit(__IGB_DOWN, &adapter->state)) + return true; + + tx_buffer = &tx_ring->tx_buffer_info[i]; + tx_desc = IGB_TX_DESC(tx_ring, i); + i -= tx_ring->count; + + do { + union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + + /* if DD is not set pending work has not been completed */ + if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buffer->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; + + /* free the skb */ + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + + /* clear last DMA location and unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + netdev_tx_completed_queue(txring_txq(tx_ring), + total_packets, total_bytes); + + i += tx_ring->count; + tx_ring->next_to_clean = i; + tx_ring->tx_stats.bytes += total_bytes; + tx_ring->tx_stats.packets += total_packets; + q_vector->tx.total_bytes += total_bytes; + q_vector->tx.total_packets += total_packets; + +#ifdef DEBUG + if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags) && + !(adapter->disable_hw_reset && adapter->tx_hang_detected)) { +#else + if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { +#endif + struct e1000_hw *hw = &adapter->hw; + + /* Detect a transmit hang in hardware, this serializes the + * check with the clearing of time_stamp and movement of i */ + clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); + if (tx_buffer->next_to_watch && + time_after(jiffies, tx_buffer->time_stamp + + (adapter->tx_timeout_factor * HZ)) + && !(E1000_READ_REG(hw, E1000_STATUS) & + E1000_STATUS_TXOFF)) { + + /* detected Tx unit hang */ +#ifdef DEBUG + adapter->tx_hang_detected = TRUE; + if (adapter->disable_hw_reset) { + DPRINTK(DRV, WARNING, + "Deactivating netdev watchdog timer\n"); + if (del_timer(&netdev_ring(tx_ring)->watchdog_timer)) + dev_put(netdev_ring(tx_ring)); +#ifndef HAVE_NET_DEVICE_OPS + netdev_ring(tx_ring)->tx_timeout = NULL; +#endif + } +#endif /* DEBUG */ + dev_err(tx_ring->dev, + "Detected Tx Unit Hang\n" + " Tx Queue <%d>\n" + " TDH <%x>\n" + " TDT <%x>\n" + " next_to_use <%x>\n" + " next_to_clean <%x>\n" + "buffer_info[next_to_clean]\n" + " time_stamp <%lx>\n" + " next_to_watch <%p>\n" + " jiffies <%lx>\n" + " desc.status <%x>\n", + tx_ring->queue_index, + E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)), + readl(tx_ring->tail), + tx_ring->next_to_use, + tx_ring->next_to_clean, + tx_buffer->time_stamp, + tx_buffer->next_to_watch, + jiffies, + tx_buffer->next_to_watch->wb.status); + if (netif_is_multiqueue(netdev_ring(tx_ring))) + netif_stop_subqueue(netdev_ring(tx_ring), + ring_queue_index(tx_ring)); + else + netif_stop_queue(netdev_ring(tx_ring)); + + /* we are about to reset, no point in enabling stuff */ + return true; + } + } + +#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) + if (unlikely(total_packets && + netif_carrier_ok(netdev_ring(tx_ring)) && + igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (netif_is_multiqueue(netdev_ring(tx_ring))) { + if (__netif_subqueue_stopped(netdev_ring(tx_ring), + ring_queue_index(tx_ring)) && + !(test_bit(__IGB_DOWN, &adapter->state))) { + netif_wake_subqueue(netdev_ring(tx_ring), + ring_queue_index(tx_ring)); + tx_ring->tx_stats.restart_queue++; + } + } else { + if (netif_queue_stopped(netdev_ring(tx_ring)) && + !(test_bit(__IGB_DOWN, &adapter->state))) { + netif_wake_queue(netdev_ring(tx_ring)); + tx_ring->tx_stats.restart_queue++; + } + } + } + + return !!budget; +} + +#ifdef HAVE_VLAN_RX_REGISTER +/** + * igb_receive_skb - helper function to handle rx indications + * @q_vector: structure containing interrupt and ring information + * @skb: packet to send up + **/ +static void igb_receive_skb(struct igb_q_vector *q_vector, + struct sk_buff *skb) +{ + struct vlan_group **vlgrp = netdev_priv(skb->dev); + + if (IGB_CB(skb)->vid) { + if (*vlgrp) { + vlan_gro_receive(&q_vector->napi, *vlgrp, + IGB_CB(skb)->vid, skb); + } else { + dev_kfree_skb_any(skb); + } + } else { + napi_gro_receive(&q_vector->napi, skb); + } +} + +#endif /* HAVE_VLAN_RX_REGISTER */ +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +/** + * igb_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void igb_reuse_rx_page(struct igb_ring *rx_ring, + struct igb_rx_buffer *old_buff) +{ + struct igb_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer)); + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, + old_buff->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); +} + +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + struct page *page, + unsigned int truesize) +{ + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= IGB_RX_BUFSZ; + +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) + return false; +#endif + + /* bump ref count on page before it is given to the stack */ + get_page(page); + + return true; +} + +/** + * igb_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. + **/ +static bool igb_add_rx_frag(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = IGB_RX_BUFSZ; +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); +#endif + + if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + +#ifdef HAVE_PTP_1588_CLOCK + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + va += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } +#endif /* HAVE_PTP_1588_CLOCK */ + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + return igb_can_reuse_rx_page(rx_buffer, page, truesize); +} + +static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct igb_rx_buffer *rx_buffer; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + + page = rx_buffer->page; + prefetchw(page); + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IGB_RX_HDR_LEN); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_failed++; + return NULL; + } + + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + } + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + + /* pull page into skb */ + if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + igb_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; + + return skb; +} + +#endif +static inline void igb_rx_checksum(struct igb_ring *ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + /* Ignore Checksum bit is set */ + if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) + return; + + /* Rx checksum disabled via ethtool */ + if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM)) + return; + + /* TCP/UDP checksum error bit is set */ + if (igb_test_staterr(rx_desc, + E1000_RXDEXT_STATERR_TCPE | + E1000_RXDEXT_STATERR_IPE)) { + /* + * work around errata with sctp packets where the TCPE aka + * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) + * packets, (aka let the stack check the crc32c) + */ + if (!((skb->len == 60) && + test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) + ring->rx_stats.csum_err++; + + /* let the stack verify checksum errors */ + return; + } + /* It must be a TCP or UDP packet with a valid checksum */ + if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | + E1000_RXD_STAT_UDPCS)) + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +#ifdef NETIF_F_RXHASH +static inline void igb_rx_hash(struct igb_ring *ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (netdev_ring(ring)->features & NETIF_F_RXHASH) + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + PKT_HASH_TYPE_L3); +} + +#endif +#ifndef IGB_NO_LRO +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT +/** + * igb_merge_active_tail - merge active tail into lro skb + * @tail: pointer to active tail in frag_list + * + * This function merges the length and data of an active tail into the + * skb containing the frag_list. It resets the tail's pointer to the head, + * but it leaves the heads pointer to tail intact. + **/ +static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail) +{ + struct sk_buff *head = IGB_CB(tail)->head; + + if (!head) + return tail; + + head->len += tail->len; + head->data_len += tail->len; + head->truesize += tail->len; + + IGB_CB(tail)->head = NULL; + + return head; +} + +/** + * igb_add_active_tail - adds an active tail into the skb frag_list + * @head: pointer to the start of the skb + * @tail: pointer to active tail to add to frag_list + * + * This function adds an active tail to the end of the frag list. This tail + * will still be receiving data so we cannot yet ad it's stats to the main + * skb. That is done via igb_merge_active_tail. + **/ +static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail) +{ + struct sk_buff *old_tail = IGB_CB(head)->tail; + + if (old_tail) { + igb_merge_active_tail(old_tail); + old_tail->next = tail; + } else { + skb_shinfo(head)->frag_list = tail; + } + + IGB_CB(tail)->head = head; + IGB_CB(head)->tail = tail; + + IGB_CB(head)->append_cnt++; +} + +/** + * igb_close_active_frag_list - cleanup pointers on a frag_list skb + * @head: pointer to head of an active frag list + * + * This function will clear the frag_tail_tracker pointer on an active + * frag_list and returns true if the pointer was actually set + **/ +static inline bool igb_close_active_frag_list(struct sk_buff *head) +{ + struct sk_buff *tail = IGB_CB(head)->tail; + + if (!tail) + return false; + + igb_merge_active_tail(tail); + + IGB_CB(head)->tail = NULL; + + return true; +} + +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled + * @adapter: board private structure + * @rx_desc: pointer to the rx descriptor + * @skb: pointer to the skb to be merged + * + **/ +static inline bool igb_can_lro(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct iphdr *iph = (struct iphdr *)skb->data; + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + + /* verify hardware indicates this is IPv4/TCP */ + if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) || + !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4)))) + return false; + + /* .. and LRO is enabled */ + if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO)) + return false; + + /* .. and we are not in promiscuous mode */ + if (netdev_ring(rx_ring)->flags & IFF_PROMISC) + return false; + + /* .. and the header is large enough for us to read IP/TCP fields */ + if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr))) + return false; + + /* .. and there are no VLANs on packet */ + if (skb->protocol != __constant_htons(ETH_P_IP)) + return false; + + /* .. and we are version 4 with no options */ + if (*(u8 *)iph != 0x45) + return false; + + /* .. and the packet is not fragmented */ + if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + return false; + + /* .. and that next header is TCP */ + if (iph->protocol != IPPROTO_TCP) + return false; + + return true; +} + +static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb) +{ + return (struct igb_lrohdr *)skb->data; +} + +/** + * igb_lro_flush - Indicate packets to upper layer. + * + * Update IP and TCP header part of head skb if more than one + * skb's chained and indicate packets to upper layer. + **/ +static void igb_lro_flush(struct igb_q_vector *q_vector, + struct sk_buff *skb) +{ + struct igb_lro_list *lrolist = &q_vector->lrolist; + + __skb_unlink(skb, &lrolist->active); + + if (IGB_CB(skb)->append_cnt) { + struct igb_lrohdr *lroh = igb_lro_hdr(skb); + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* close any active lro contexts */ + igb_close_active_frag_list(skb); + +#endif + /* incorporate ip header and re-calculate checksum */ + lroh->iph.tot_len = ntohs(skb->len); + lroh->iph.check = 0; + + /* header length is 5 since we know no options exist */ + lroh->iph.check = ip_fast_csum((u8 *)lroh, 5); + + /* clear TCP checksum to indicate we are an LRO frame */ + lroh->th.check = 0; + + /* incorporate latest timestamp into the tcp header */ + if (IGB_CB(skb)->tsecr) { + lroh->ts[2] = IGB_CB(skb)->tsecr; + lroh->ts[1] = htonl(IGB_CB(skb)->tsval); + } +#ifdef NETIF_F_GSO + + skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; +#endif + } + +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, skb); +#else + napi_gro_receive(&q_vector->napi, skb); +#endif + lrolist->stats.flushed++; +} + +static void igb_lro_flush_all(struct igb_q_vector *q_vector) +{ + struct igb_lro_list *lrolist = &q_vector->lrolist; + struct sk_buff *skb, *tmp; + + skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp) + igb_lro_flush(q_vector, skb); +} + +/* + * igb_lro_header_ok - Main LRO function. + **/ +static void igb_lro_header_ok(struct sk_buff *skb) +{ + struct igb_lrohdr *lroh = igb_lro_hdr(skb); + u16 opt_bytes, data_len; + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + IGB_CB(skb)->tail = NULL; +#endif + IGB_CB(skb)->tsecr = 0; + IGB_CB(skb)->append_cnt = 0; + IGB_CB(skb)->mss = 0; + + /* ensure that the checksum is valid */ + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + return; + + /* If we see CE codepoint in IP header, packet is not mergeable */ + if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph))) + return; + + /* ensure no bits set besides ack or psh */ + if (lroh->th.fin || lroh->th.syn || lroh->th.rst || + lroh->th.urg || lroh->th.ece || lroh->th.cwr || + !lroh->th.ack) + return; + + /* store the total packet length */ + data_len = ntohs(lroh->iph.tot_len); + + /* remove any padding from the end of the skb */ + __pskb_trim(skb, data_len); + + /* remove header length from data length */ + data_len -= sizeof(struct igb_lrohdr); + + /* + * check for timestamps. Since the only option we handle are timestamps, + * we only have to handle the simple case of aligned timestamps + */ + opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr); + if (opt_bytes != 0) { + if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) || + !pskb_may_pull(skb, sizeof(struct igb_lrohdr) + + TCPOLEN_TSTAMP_ALIGNED) || + (lroh->ts[0] != htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP)) || + (lroh->ts[2] == 0)) { + return; + } + + IGB_CB(skb)->tsval = ntohl(lroh->ts[1]); + IGB_CB(skb)->tsecr = lroh->ts[2]; + + data_len -= TCPOLEN_TSTAMP_ALIGNED; + } + + /* record data_len as mss for the packet */ + IGB_CB(skb)->mss = data_len; + IGB_CB(skb)->next_seq = ntohl(lroh->th.seq); +} + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +static void igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb) +{ + struct skb_shared_info *sh_info; + struct skb_shared_info *new_skb_info; + unsigned int data_len; + + sh_info = skb_shinfo(lro_skb); + new_skb_info = skb_shinfo(new_skb); + + /* copy frags into the last skb */ + memcpy(sh_info->frags + sh_info->nr_frags, + new_skb_info->frags, + new_skb_info->nr_frags * sizeof(skb_frag_t)); + + /* copy size data over */ + sh_info->nr_frags += new_skb_info->nr_frags; + data_len = IGB_CB(new_skb)->mss; + lro_skb->len += data_len; + lro_skb->data_len += data_len; + lro_skb->truesize += data_len; + + /* wipe record of data from new_skb */ + new_skb_info->nr_frags = 0; + new_skb->len = new_skb->data_len = 0; + dev_kfree_skb_any(new_skb); +} + +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_lro_receive - if able, queue skb into lro chain + * @q_vector: structure containing interrupt and ring information + * @new_skb: pointer to current skb being checked + * + * Checks whether the skb given is eligible for LRO and if that's + * fine chains it to the existing lro_skb based on flowid. If an LRO for + * the flow doesn't exist create one. + **/ +static void igb_lro_receive(struct igb_q_vector *q_vector, + struct sk_buff *new_skb) +{ + struct sk_buff *lro_skb; + struct igb_lro_list *lrolist = &q_vector->lrolist; + struct igb_lrohdr *lroh = igb_lro_hdr(new_skb); + __be32 saddr = lroh->iph.saddr; + __be32 daddr = lroh->iph.daddr; + __be32 tcp_ports = *(__be32 *)&lroh->th; + u16 data_len; +#ifdef HAVE_VLAN_RX_REGISTER + u16 vid = IGB_CB(new_skb)->vid; +#else + u16 vid = new_skb->vlan_tci; +#endif + + igb_lro_header_ok(new_skb); + + /* + * we have a packet that might be eligible for LRO, + * so see if it matches anything we might expect + */ + skb_queue_walk(&lrolist->active, lro_skb) { + if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports || + igb_lro_hdr(lro_skb)->iph.saddr != saddr || + igb_lro_hdr(lro_skb)->iph.daddr != daddr) + continue; + +#ifdef HAVE_VLAN_RX_REGISTER + if (IGB_CB(lro_skb)->vid != vid) +#else + if (lro_skb->vlan_tci != vid) +#endif + continue; + + /* out of order packet */ + if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) { + igb_lro_flush(q_vector, lro_skb); + IGB_CB(new_skb)->mss = 0; + break; + } + + /* TCP timestamp options have changed */ + if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) { + igb_lro_flush(q_vector, lro_skb); + break; + } + + /* make sure timestamp values are increasing */ + if (IGB_CB(lro_skb)->tsecr && + IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) { + igb_lro_flush(q_vector, lro_skb); + IGB_CB(new_skb)->mss = 0; + break; + } + + data_len = IGB_CB(new_skb)->mss; + + /* Check for all of the above below + * malformed header + * no tcp data + * resultant packet would be too large + * new skb is larger than our current mss + * data would remain in header + * we would consume more frags then the sk_buff contains + * ack sequence numbers changed + * window size has changed + */ + if (data_len == 0 || + data_len > IGB_CB(lro_skb)->mss || + data_len > IGB_CB(lro_skb)->free || +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + data_len != new_skb->data_len || + skb_shinfo(new_skb)->nr_frags >= + (MAX_SKB_FRAGS - skb_shinfo(lro_skb)->nr_frags) || +#endif + igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq || + igb_lro_hdr(lro_skb)->th.window != lroh->th.window) { + igb_lro_flush(q_vector, lro_skb); + break; + } + + /* Remove IP and TCP header*/ + skb_pull(new_skb, new_skb->len - data_len); + + /* update timestamp and timestamp echo response */ + IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval; + IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr; + + /* update sequence and free space */ + IGB_CB(lro_skb)->next_seq += data_len; + IGB_CB(lro_skb)->free -= data_len; + + /* update append_cnt */ + IGB_CB(lro_skb)->append_cnt++; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* if header is empty pull pages into current skb */ + igb_merge_frags(lro_skb, new_skb); +#else + /* chain this new skb in frag_list */ + igb_add_active_tail(lro_skb, new_skb); +#endif + + if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh || + skb_shinfo(lro_skb)->nr_frags == MAX_SKB_FRAGS) { + igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh; + igb_lro_flush(q_vector, lro_skb); + } + + lrolist->stats.coal++; + return; + } + + if (IGB_CB(new_skb)->mss && !lroh->th.psh) { + /* if we are at capacity flush the tail */ + if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) { + lro_skb = skb_peek_tail(&lrolist->active); + if (lro_skb) + igb_lro_flush(q_vector, lro_skb); + } + + /* update sequence and free space */ + IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss; + IGB_CB(new_skb)->free = 65521 - new_skb->len; + + /* .. and insert at the front of the active list */ + __skb_queue_head(&lrolist->active, new_skb); + + lrolist->stats.coal++; + return; + } + + /* packet not handled by any of the above, pass it to the stack */ +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, new_skb); +#else + napi_gro_receive(&q_vector->napi, new_skb); +#endif +} + +#endif /* IGB_NO_LRO */ +/** + * igb_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, timestamp, protocol, and + * other fields within the skb. + **/ +static void igb_process_skb_fields(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct net_device *dev = rx_ring->netdev; + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + +#ifdef NETIF_F_RXHASH + igb_rx_hash(rx_ring, rx_desc, skb); + +#endif + igb_rx_checksum(rx_ring, rx_desc, skb); + + /* update packet type stats */ + if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4)) + rx_ring->rx_stats.ipv4_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4_EX)) + rx_ring->rx_stats.ipv4e_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6)) + rx_ring->rx_stats.ipv6_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6_EX)) + rx_ring->rx_stats.ipv6e_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) + rx_ring->rx_stats.tcp_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_UDP)) + rx_ring->rx_stats.udp_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_SCTP)) + rx_ring->rx_stats.sctp_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_NFS)) + rx_ring->rx_stats.nfs_packets++; + +#ifdef HAVE_PTP_1588_CLOCK + igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); +#endif /* HAVE_PTP_1588_CLOCK */ + +#ifdef NETIF_F_HW_VLAN_CTAG_RX + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && +#else + if ((dev->features & NETIF_F_HW_VLAN_RX) && +#endif + igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { + u16 vid = 0; + if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && + test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) + vid = be16_to_cpu(rx_desc->wb.upper.vlan); + else + vid = le16_to_cpu(rx_desc->wb.upper.vlan); +#ifdef HAVE_VLAN_RX_REGISTER + IGB_CB(skb)->vid = vid; + } else { + IGB_CB(skb)->vid = 0; +#else + +#ifdef HAVE_VLAN_PROTOCOL + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); +#else + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); +#endif + + +#endif + } + + skb_record_rx_queue(skb, rx_ring->queue_index); + + skb->protocol = eth_type_trans(skb, dev); +} + +/** + * igb_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool igb_is_non_eop(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IGB_RX_DESC(rx_ring, ntc)); + + if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))) + return false; + + return true; +} + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT +/* igb_clean_rx_irq -- * legacy */ +static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) +{ + struct igb_ring *rx_ring = q_vector->rx.ring; + unsigned int total_bytes = 0, total_packets = 0; + u16 cleaned_count = igb_desc_unused(rx_ring); + + do { + struct igb_rx_buffer *rx_buffer; + union e1000_adv_rx_desc *rx_desc; + struct sk_buff *skb; + u16 ntc; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IGB_RX_BUFFER_WRITE) { + igb_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } + + ntc = rx_ring->next_to_clean; + rx_desc = IGB_RX_DESC(rx_ring, ntc); + rx_buffer = &rx_ring->rx_buffer_info[ntc]; + + if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + break; + + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); + + skb = rx_buffer->skb; + + prefetch(skb->data); + + /* pull the header of the skb in */ + __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length)); + + /* clear skb reference in buffer info structure */ + rx_buffer->skb = NULL; + + cleaned_count++; + + BUG_ON(igb_is_non_eop(rx_ring, rx_desc)); + + dma_unmap_single(rx_ring->dev, rx_buffer->dma, + rx_ring->rx_buffer_len, + DMA_FROM_DEVICE); + rx_buffer->dma = 0; + + if (igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + dev_kfree_skb_any(skb); + continue; + } + + total_bytes += skb->len; + + /* populate checksum, timestamp, VLAN, and protocol */ + igb_process_skb_fields(rx_ring, rx_desc, skb); + +#ifndef IGB_NO_LRO + if (igb_can_lro(rx_ring, rx_desc, skb)) + igb_lro_receive(q_vector, skb); + else +#endif +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, skb); +#else + napi_gro_receive(&q_vector->napi, skb); +#endif + +#ifndef NETIF_F_GRO + netdev_ring(rx_ring)->last_rx = jiffies; + +#endif + /* update budget accounting */ + total_packets++; + } while (likely(total_packets < budget)); + + rx_ring->rx_stats.packets += total_packets; + rx_ring->rx_stats.bytes += total_bytes; + q_vector->rx.total_packets += total_packets; + q_vector->rx.total_bytes += total_bytes; + + if (cleaned_count) + igb_alloc_rx_buffers(rx_ring, cleaned_count); + +#ifndef IGB_NO_LRO + igb_lro_flush_all(q_vector); + +#endif /* IGB_NO_LRO */ + return total_packets < budget; +} +#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_get_headlen - determine size of header for LRO/GRO + * @data: pointer to the start of the headers + * @max_len: total length of section to find headers in + * + * This function is meant to determine the length of headers that will + * be recognized by hardware for LRO, and GRO offloads. The main + * motivation of doing this is to only perform one pull for IPv4 TCP + * packets so that we can do basic things like calculating the gso_size + * based on the average data per packet. + **/ +static unsigned int igb_get_headlen(unsigned char *data, + unsigned int max_len) +{ + union { + unsigned char *network; + /* l2 headers */ + struct ethhdr *eth; + struct vlan_hdr *vlan; + /* l3 headers */ + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + } hdr; + __be16 protocol; + u8 nexthdr = 0; /* default to not TCP */ + u8 hlen; + + /* this should never happen, but better safe than sorry */ + if (max_len < ETH_HLEN) + return max_len; + + /* initialize network frame pointer */ + hdr.network = data; + + /* set first protocol and move network header forward */ + protocol = hdr.eth->h_proto; + hdr.network += ETH_HLEN; + + /* handle any vlan tag if present */ + if (protocol == __constant_htons(ETH_P_8021Q)) { + if ((hdr.network - data) > (max_len - VLAN_HLEN)) + return max_len; + + protocol = hdr.vlan->h_vlan_encapsulated_proto; + hdr.network += VLAN_HLEN; + } + + /* handle L3 protocols */ + if (protocol == __constant_htons(ETH_P_IP)) { + if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) + return max_len; + + /* access ihl as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct iphdr)) + return hdr.network - data; + + /* record next protocol if header is present */ + if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) + nexthdr = hdr.ipv4->protocol; +#ifdef NETIF_F_TSO6 + } else if (protocol == __constant_htons(ETH_P_IPV6)) { + if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) + return max_len; + + /* record next protocol */ + nexthdr = hdr.ipv6->nexthdr; + hlen = sizeof(struct ipv6hdr); +#endif /* NETIF_F_TSO6 */ + } else { + return hdr.network - data; + } + + /* relocate pointer to start of L4 header */ + hdr.network += hlen; + + /* finally sort out TCP */ + if (nexthdr == IPPROTO_TCP) { + if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) + return max_len; + + /* access doff as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[12] & 0xF0) >> 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct tcphdr)) + return hdr.network - data; + + hdr.network += hlen; + } else if (nexthdr == IPPROTO_UDP) { + if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) + return max_len; + + hdr.network += sizeof(struct udphdr); + } + + /* + * If everything has gone correctly hdr.network should be the + * data section of the packet and will be the end of the header. + * If not then it probably represents the end of the last recognized + * header. + */ + if ((hdr.network - data) < max_len) + return hdr.network - data; + else + return max_len; +} + +/** + * igb_pull_tail - igb specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being adjusted + * + * This function is an igb specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void igb_pull_tail(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + +#ifdef HAVE_PTP_1588_CLOCK + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + /* retrieve timestamp from buffer */ + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + + /* update pointers to remove timestamp header */ + skb_frag_size_sub(frag, IGB_TS_HDR_LEN); + frag->page_offset += IGB_TS_HDR_LEN; + skb->data_len -= IGB_TS_HDR_LEN; + skb->len -= IGB_TS_HDR_LEN; + + /* move va to start of packet data */ + va += IGB_TS_HDR_LEN; + } +#endif /* HAVE_PTP_1588_CLOCK */ + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * igb_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool igb_cleanup_headers(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + + if (unlikely((igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { + struct net_device *netdev = rx_ring->netdev; + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + igb_pull_tail(rx_ring, rx_desc, skb); + + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; +} + +/* igb_clean_rx_irq -- * packet split */ +static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) +{ + struct igb_ring *rx_ring = q_vector->rx.ring; + struct sk_buff *skb = rx_ring->skb; + unsigned int total_bytes = 0, total_packets = 0; + u16 cleaned_count = igb_desc_unused(rx_ring); + + do { + union e1000_adv_rx_desc *rx_desc; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IGB_RX_BUFFER_WRITE) { + igb_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } + + rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); + + if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + break; + + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); + + /* retrieve a buffer from the ring */ + skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); + + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; + + cleaned_count++; + + /* fetch next buffer in frame if non-eop */ + if (igb_is_non_eop(rx_ring, rx_desc)) + continue; + + /* verify the packet layout is correct */ + if (igb_cleanup_headers(rx_ring, rx_desc, skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_bytes += skb->len; + + /* populate checksum, timestamp, VLAN, and protocol */ + igb_process_skb_fields(rx_ring, rx_desc, skb); + +#ifndef IGB_NO_LRO + if (igb_can_lro(rx_ring, rx_desc, skb)) + igb_lro_receive(q_vector, skb); + else +#endif +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, skb); +#else + napi_gro_receive(&q_vector->napi, skb); +#endif +#ifndef NETIF_F_GRO + + netdev_ring(rx_ring)->last_rx = jiffies; +#endif + + /* reset skb pointer */ + skb = NULL; + + /* update budget accounting */ + total_packets++; + } while (likely(total_packets < budget)); + + /* place incomplete frames back on ring for completion */ + rx_ring->skb = skb; + + rx_ring->rx_stats.packets += total_packets; + rx_ring->rx_stats.bytes += total_bytes; + q_vector->rx.total_packets += total_packets; + q_vector->rx.total_bytes += total_bytes; + + if (cleaned_count) + igb_alloc_rx_buffers(rx_ring, cleaned_count); + +#ifndef IGB_NO_LRO + igb_lro_flush_all(q_vector); + +#endif /* IGB_NO_LRO */ + return total_packets < budget; +} +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT +static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, + struct igb_rx_buffer *bi) +{ + struct sk_buff *skb = bi->skb; + dma_addr_t dma = bi->dma; + + if (dma) + return true; + + if (likely(!skb)) { + skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring), + rx_ring->rx_buffer_len); + bi->skb = skb; + if (!skb) { + rx_ring->rx_stats.alloc_failed++; + return false; + } + + /* initialize skb for ring */ + skb_record_rx_queue(skb, ring_queue_index(rx_ring)); + } + + dma = dma_map_single(rx_ring->dev, skb->data, + rx_ring->rx_buffer_len, DMA_FROM_DEVICE); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + dev_kfree_skb_any(skb); + bi->skb = NULL; + + rx_ring->rx_stats.alloc_failed++; + return false; + } + + bi->dma = dma; + return true; +} + +#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, + struct igb_rx_buffer *bi) +{ + struct page *page = bi->page; + dma_addr_t dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) + return true; + + /* alloc new page for storage */ + page = alloc_page(GFP_ATOMIC | __GFP_COLD); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_failed++; + return false; + } + + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + + /* + * if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_page(page); + + rx_ring->rx_stats.alloc_failed++; + return false; + } + + bi->dma = dma; + bi->page = page; + bi->page_offset = 0; + + return true; +} + +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_alloc_rx_buffers - Replace used receive buffers; packet split + * @adapter: address of board private structure + **/ +void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) +{ + union e1000_adv_rx_desc *rx_desc; + struct igb_rx_buffer *bi; + u16 i = rx_ring->next_to_use; + + /* nothing to do */ + if (!cleaned_count) + return; + + rx_desc = IGB_RX_DESC(rx_ring, i); + bi = &rx_ring->rx_buffer_info[i]; + i -= rx_ring->count; + + do { +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + if (!igb_alloc_mapped_skb(rx_ring, bi)) +#else + if (!igb_alloc_mapped_page(rx_ring, bi)) +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + break; + + /* + * Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); +#else + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); +#endif + + rx_desc++; + bi++; + i++; + if (unlikely(!i)) { + rx_desc = IGB_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_buffer_info; + i -= rx_ring->count; + } + + /* clear the hdr_addr for the next_to_use descriptor */ + rx_desc->read.hdr_addr = 0; + + cleaned_count--; + } while (cleaned_count); + + i += rx_ring->count; + + if (rx_ring->next_to_use != i) { + /* record the next descriptor to use */ + rx_ring->next_to_use = i; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = i; + +#endif + /* + * Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, rx_ring->tail); + } +} + +#ifdef SIOCGMIIPHY +/** + * igb_mii_ioctl - + * @netdev: + * @ifreq: + * @cmd: + **/ +static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct mii_ioctl_data *data = if_mii(ifr); + + if (adapter->hw.phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = adapter->hw.phy.addr; + break; + case SIOCGMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, + &data->val_out)) + return -EIO; + break; + case SIOCSMIIREG: + default: + return -EOPNOTSUPP; + } + return E1000_SUCCESS; +} + +#endif +/** + * igb_ioctl - + * @netdev: + * @ifreq: + * @cmd: + **/ +static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { +#ifdef SIOCGMIIPHY + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: + return igb_mii_ioctl(netdev, ifr, cmd); +#endif +#ifdef HAVE_PTP_1588_CLOCK + case SIOCSHWTSTAMP: + return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); +#endif /* HAVE_PTP_1588_CLOCK */ +#ifdef ETHTOOL_OPS_COMPAT + case SIOCETHTOOL: + return ethtool_ioctl(ifr); +#endif + default: + return -EOPNOTSUPP; + } +} + +s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + u16 cap_offset; + + cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); + if (!cap_offset) + return -E1000_ERR_CONFIG; + + pci_read_config_word(adapter->pdev, cap_offset + reg, value); + + return E1000_SUCCESS; +} + +s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + u16 cap_offset; + + cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); + if (!cap_offset) + return -E1000_ERR_CONFIG; + + pci_write_config_word(adapter->pdev, cap_offset + reg, *value); + + return E1000_SUCCESS; +} + +#ifdef HAVE_VLAN_RX_REGISTER +static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp) +#else +void igb_vlan_mode(struct net_device *netdev, u32 features) +#endif +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 ctrl, rctl; + int i; +#ifdef HAVE_VLAN_RX_REGISTER + bool enable = !!vlgrp; + + igb_irq_disable(adapter); + + adapter->vlgrp = vlgrp; + + if (!test_bit(__IGB_DOWN, &adapter->state)) + igb_irq_enable(adapter); +#else +#ifdef NETIF_F_HW_VLAN_CTAG_RX + bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); +#else + bool enable = !!(features & NETIF_F_HW_VLAN_RX); +#endif +#endif + + if (enable) { + /* enable VLAN tag insert/strip */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_VME; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Disable CFI check */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl &= ~E1000_RCTL_CFIEN; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + } else { + /* disable VLAN tag insert/strip */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + } + +#ifndef CONFIG_IGB_VMDQ_NETDEV + for (i = 0; i < adapter->vmdq_pools; i++) { + igb_set_vf_vlan_strip(adapter, + adapter->vfs_allocated_count + i, + enable); + } + +#else + igb_set_vf_vlan_strip(adapter, + adapter->vfs_allocated_count, + enable); + + for (i = 1; i < adapter->vmdq_pools; i++) { +#ifdef HAVE_VLAN_RX_REGISTER + struct igb_vmdq_adapter *vadapter; + vadapter = netdev_priv(adapter->vmdq_netdev[i-1]); + enable = !!vadapter->vlgrp; +#else + struct net_device *vnetdev; + vnetdev = adapter->vmdq_netdev[i-1]; +#ifdef NETIF_F_HW_VLAN_CTAG_RX + enable = !!(vnetdev->features & NETIF_F_HW_VLAN_CTAG_RX); +#else + enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX); +#endif +#endif + igb_set_vf_vlan_strip(adapter, + adapter->vfs_allocated_count + i, + enable); + } + +#endif + igb_rlpml_set(adapter); +} + +#ifdef HAVE_VLAN_PROTOCOL +static int igb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) +#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef NETIF_F_HW_VLAN_CTAG_RX +static int igb_vlan_rx_add_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +#else +static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +#endif +#else +static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +#endif +{ + struct igb_adapter *adapter = netdev_priv(netdev); + int pf_id = adapter->vfs_allocated_count; + + /* attempt to add filter to vlvf array */ + igb_vlvf_set(adapter, vid, TRUE, pf_id); + + /* add the filter since PF can receive vlans w/o entry in vlvf */ + igb_vfta_set(adapter, vid, TRUE); +#ifndef HAVE_NETDEV_VLAN_FEATURES + + /* Copy feature flags from netdev to the vlan netdev for this vid. + * This allows things like TSO to bubble down to our vlan device. + * There is no need to update netdev for vlan 0 (DCB), since it + * wouldn't has v_netdev. + */ + if (adapter->vlgrp) { + struct vlan_group *vlgrp = adapter->vlgrp; + struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid); + if (v_netdev) { + v_netdev->features |= netdev->features; + vlan_group_set_device(vlgrp, vid, v_netdev); + } + } +#endif +#ifndef HAVE_VLAN_RX_REGISTER + + set_bit(vid, adapter->active_vlans); +#endif +#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID + return 0; +#endif +} + +#ifdef HAVE_VLAN_PROTOCOL +static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) +#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef NETIF_F_HW_VLAN_CTAG_RX +static int igb_vlan_rx_kill_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +#else +static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +#endif +#else +static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +#endif +{ + struct igb_adapter *adapter = netdev_priv(netdev); + int pf_id = adapter->vfs_allocated_count; + s32 err; + +#ifdef HAVE_VLAN_RX_REGISTER + igb_irq_disable(adapter); + + vlan_group_set_device(adapter->vlgrp, vid, NULL); + + if (!test_bit(__IGB_DOWN, &adapter->state)) + igb_irq_enable(adapter); + +#endif /* HAVE_VLAN_RX_REGISTER */ + /* remove vlan from VLVF table array */ + err = igb_vlvf_set(adapter, vid, FALSE, pf_id); + + /* if vid was not present in VLVF just remove it from table */ + if (err) + igb_vfta_set(adapter, vid, FALSE); +#ifndef HAVE_VLAN_RX_REGISTER + + clear_bit(vid, adapter->active_vlans); +#endif +#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID + return 0; +#endif +} + +static void igb_restore_vlan(struct igb_adapter *adapter) +{ +#ifdef HAVE_VLAN_RX_REGISTER + igb_vlan_mode(adapter->netdev, adapter->vlgrp); + + if (adapter->vlgrp) { + u16 vid; + for (vid = 0; vid < VLAN_N_VID; vid++) { + if (!vlan_group_get_device(adapter->vlgrp, vid)) + continue; +#ifdef NETIF_F_HW_VLAN_CTAG_RX + igb_vlan_rx_add_vid(adapter->netdev, + htons(ETH_P_8021Q), vid); +#else + igb_vlan_rx_add_vid(adapter->netdev, vid); +#endif + } + } +#else + u16 vid; + + igb_vlan_mode(adapter->netdev, adapter->netdev->features); + + for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) +#ifdef NETIF_F_HW_VLAN_CTAG_RX + igb_vlan_rx_add_vid(adapter->netdev, + htons(ETH_P_8021Q), vid); +#else + igb_vlan_rx_add_vid(adapter->netdev, vid); +#endif +#endif +} + +int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx) +{ + struct pci_dev *pdev = adapter->pdev; + struct e1000_mac_info *mac = &adapter->hw.mac; + + mac->autoneg = 0; + + /* SerDes device's does not support 10Mbps Full/duplex + * and 100Mbps Half duplex + */ + if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { + switch (spddplx) { + case SPEED_10 + DUPLEX_HALF: + case SPEED_10 + DUPLEX_FULL: + case SPEED_100 + DUPLEX_HALF: + dev_err(pci_dev_to_dev(pdev), + "Unsupported Speed/Duplex configuration\n"); + return -EINVAL; + default: + break; + } + } + + switch (spddplx) { + case SPEED_10 + DUPLEX_HALF: + mac->forced_speed_duplex = ADVERTISE_10_HALF; + break; + case SPEED_10 + DUPLEX_FULL: + mac->forced_speed_duplex = ADVERTISE_10_FULL; + break; + case SPEED_100 + DUPLEX_HALF: + mac->forced_speed_duplex = ADVERTISE_100_HALF; + break; + case SPEED_100 + DUPLEX_FULL: + mac->forced_speed_duplex = ADVERTISE_100_FULL; + break; + case SPEED_1000 + DUPLEX_FULL: + mac->autoneg = 1; + adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case SPEED_1000 + DUPLEX_HALF: /* not supported */ + default: + dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n"); + return -EINVAL; + } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + + return 0; +} + +static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, + bool runtime) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 ctrl, rctl, status; + u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; +#ifdef CONFIG_PM + int retval = 0; +#endif + + netif_device_detach(netdev); + + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_LU) + wufc &= ~E1000_WUFC_LNKC; + + if (netif_running(netdev)) + __igb_close(netdev, true); + + igb_clear_interrupt_scheme(adapter); + +#ifdef CONFIG_PM + retval = pci_save_state(pdev); + if (retval) + return retval; +#endif + + if (wufc) { + igb_setup_rctl(adapter); + igb_set_rx_mode(netdev); + + /* turn on all-multi mode if wake on multicast is enabled */ + if (wufc & E1000_WUFC_MC) { + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl |= E1000_RCTL_MPE; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + } + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + /* phy power management enable */ + #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 + ctrl |= E1000_CTRL_ADVD3WUC; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Allow time for pending master requests to run */ + e1000_disable_pcie_master(hw); + + E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(hw, E1000_WUFC, wufc); + } else { + E1000_WRITE_REG(hw, E1000_WUC, 0); + E1000_WRITE_REG(hw, E1000_WUFC, 0); + } + + *enable_wake = wufc || adapter->en_mng_pt; + if (!*enable_wake) + igb_power_down_link(adapter); + else + igb_power_up_link(adapter); + + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. */ + igb_release_hw_control(adapter); + + pci_disable_device(pdev); + + return 0; +} + +#ifdef CONFIG_PM +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_suspend(struct device *dev) +#else +static int igb_suspend(struct pci_dev *pdev, pm_message_t state) +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +{ +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS + struct pci_dev *pdev = to_pci_dev(dev); +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ + int retval; + bool wake; + + retval = __igb_shutdown(pdev, &wake, 0); + if (retval) + return retval; + + if (wake) { + pci_prepare_to_sleep(pdev); + } else { + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_resume(struct device *dev) +#else +static int igb_resume(struct pci_dev *pdev) +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +{ +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS + struct pci_dev *pdev = to_pci_dev(dev); +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + pci_save_state(pdev); + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "igb: Cannot enable PCI device from suspend\n"); + return err; + } + pci_set_master(pdev); + + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); + + if (igb_init_interrupt_scheme(adapter, true)) { + dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + igb_reset(adapter); + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + + E1000_WRITE_REG(hw, E1000_WUS, ~0); + + if (netdev->flags & IFF_UP) { + rtnl_lock(); + err = __igb_open(netdev, true); + rtnl_unlock(); + if (err) + return err; + } + + netif_device_attach(netdev); + + return 0; +} + +#ifdef CONFIG_PM_RUNTIME +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_runtime_idle(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + + if (!igb_has_link(adapter)) + pm_schedule_suspend(dev, MSEC_PER_SEC * 5); + + return -EBUSY; +} + +static int igb_runtime_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int retval; + bool wake; + + retval = __igb_shutdown(pdev, &wake, 1); + if (retval) + return retval; + + if (wake) { + pci_prepare_to_sleep(pdev); + } else { + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + +static int igb_runtime_resume(struct device *dev) +{ + return igb_resume(dev); +} +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +#endif /* CONFIG_PM_RUNTIME */ +#endif /* CONFIG_PM */ + +#ifdef USE_REBOOT_NOTIFIER +/* only want to do this for 2.4 kernels? */ +static int igb_notify_reboot(struct notifier_block *nb, unsigned long event, + void *p) +{ + struct pci_dev *pdev = NULL; + bool wake; + + switch (event) { + case SYS_DOWN: + case SYS_HALT: + case SYS_POWER_OFF: + while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) { + if (pci_dev_driver(pdev) == &igb_driver) { + __igb_shutdown(pdev, &wake, 0); + if (event == SYS_POWER_OFF) { + pci_wake_from_d3(pdev, wake); + pci_set_power_state(pdev, PCI_D3hot); + } + } + } + } + return NOTIFY_DONE; +} +#else +static void igb_shutdown(struct pci_dev *pdev) +{ + bool wake = false; + + __igb_shutdown(pdev, &wake, 0); + + if (system_state == SYSTEM_POWER_OFF) { + pci_wake_from_d3(pdev, wake); + pci_set_power_state(pdev, PCI_D3hot); + } +} +#endif /* USE_REBOOT_NOTIFIER */ + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* + * Polling 'interrupt' - used by things like netconsole to send skbs + * without having to re-enable interrupts. It's not called while + * the interrupt routine is executing. + */ +static void igb_netpoll(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct igb_q_vector *q_vector; + int i; + + for (i = 0; i < adapter->num_q_vectors; i++) { + q_vector = adapter->q_vector[i]; + if (adapter->msix_entries) + E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value); + else + igb_irq_disable(adapter); + napi_schedule(&q_vector->napi); + } +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +#ifdef HAVE_PCI_ERS +#define E1000_DEV_ID_82576_VF 0x10CA +/** + * igb_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state + * + * This function is called after a PCI bus error affecting + * this device has been detected. + */ +static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + +#ifdef CONFIG_PCI_IOV__UNUSED + struct pci_dev *bdev, *vfdev; + u32 dw0, dw1, dw2, dw3; + int vf, pos; + u16 req_id, pf_func; + + if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA)) + goto skip_bad_vf_detection; + + bdev = pdev->bus->self; + while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT)) + bdev = bdev->bus->self; + + if (!bdev) + goto skip_bad_vf_detection; + + pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR); + if (!pos) + goto skip_bad_vf_detection; + + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0); + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1); + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2); + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3); + + req_id = dw1 >> 16; + /* On the 82576 if bit 7 of the requestor ID is set then it's a VF */ + if (!(req_id & 0x0080)) + goto skip_bad_vf_detection; + + pf_func = req_id & 0x01; + if ((pf_func & 1) == (pdev->devfn & 1)) { + + vf = (req_id & 0x7F) >> 1; + dev_err(pci_dev_to_dev(pdev), + "VF %d has caused a PCIe error\n", vf); + dev_err(pci_dev_to_dev(pdev), + "TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: " + "%8.8x\tdw3: %8.8x\n", + dw0, dw1, dw2, dw3); + + /* Find the pci device of the offending VF */ + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, + E1000_DEV_ID_82576_VF, NULL); + while (vfdev) { + if (vfdev->devfn == (req_id & 0xFF)) + break; + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, + E1000_DEV_ID_82576_VF, vfdev); + } + /* + * There's a slim chance the VF could have been hot plugged, + * so if it is no longer present we don't need to issue the + * VFLR. Just clean up the AER in that case. + */ + if (vfdev) { + dev_err(pci_dev_to_dev(pdev), + "Issuing VFLR to VF %d\n", vf); + pci_write_config_dword(vfdev, 0xA8, 0x00008000); + } + + pci_cleanup_aer_uncorrect_error_status(pdev); + } + + /* + * Even though the error may have occurred on the other port + * we still need to increment the vf error reference count for + * both ports because the I/O resume function will be called + * for both of them. + */ + adapter->vferr_refcount++; + + return PCI_ERS_RESULT_RECOVERED; + +skip_bad_vf_detection: +#endif /* CONFIG_PCI_IOV */ + + netif_device_detach(netdev); + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + if (netif_running(netdev)) + igb_down(adapter); + pci_disable_device(pdev); + + /* Request a slot slot reset. */ + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * igb_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device + * + * Restart the card from scratch, as if from a cold-boot. Implementation + * resembles the first-half of the igb_resume routine. + */ +static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + pci_ers_result_t result; + + if (pci_enable_device_mem(pdev)) { + dev_err(pci_dev_to_dev(pdev), + "Cannot re-enable PCI device after reset.\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); + + schedule_work(&adapter->reset_task); + E1000_WRITE_REG(hw, E1000_WUS, ~0); + result = PCI_ERS_RESULT_RECOVERED; + } + + pci_cleanup_aer_uncorrect_error_status(pdev); + + return result; +} + +/** + * igb_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device + * + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. Implementation resembles the + * second-half of the igb_resume routine. + */ +static void igb_io_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + + if (adapter->vferr_refcount) { + dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n"); + adapter->vferr_refcount--; + return; + } + + if (netif_running(netdev)) { + if (igb_up(adapter)) { + dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n"); + return; + } + } + + netif_device_attach(netdev); + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); +} + +#endif /* HAVE_PCI_ERS */ + +int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + if (is_zero_ether_addr(addr)) + return 0; + + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) + continue; + adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED | + IGB_MAC_STATE_IN_USE); + memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN); + adapter->mac_table[i].queue = queue; + igb_sync_mac_table(adapter); + return 0; + } + return -ENOMEM; +} +int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue) +{ + /* search table for addr, if found, set to 0 and sync */ + int i; + struct e1000_hw *hw = &adapter->hw; + + if (is_zero_ether_addr(addr)) + return 0; + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (ether_addr_equal(addr, adapter->mac_table[i].addr) && + adapter->mac_table[i].queue == queue) { + adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + igb_sync_mac_table(adapter); + return 0; + } + } + return -ENOMEM; +} +static int igb_set_vf_mac(struct igb_adapter *adapter, + int vf, unsigned char *mac_addr) +{ + igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf); + memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); + + igb_add_mac_filter(adapter, mac_addr, vf); + + return 0; +} + +#ifdef IFLA_VF_MAX +static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) + return -EINVAL; + adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; + dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); + dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" + " change effective.\n"); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," + " but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, "Bring the PF device up before" + " attempting to use the VF device.\n"); + } + return igb_set_vf_mac(adapter, vf, mac); +} + +static int igb_link_mbps(int internal_link_speed) +{ + switch (internal_link_speed) { + case SPEED_100: + return 100; + case SPEED_1000: + return 1000; + case SPEED_2500: + return 2500; + default: + return 0; + } +} + +static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, + int link_speed) +{ + int rf_dec, rf_int; + u32 bcnrc_val; + + if (tx_rate != 0) { + /* Calculate the rate factor values to set */ + rf_int = link_speed / tx_rate; + rf_dec = (link_speed - (rf_int * tx_rate)); + rf_dec = (rf_dec * (1<vf_rate_link_speed == 0) || + (adapter->hw.mac.type != e1000_82576)) + return; + + actual_link_speed = igb_link_mbps(adapter->link_speed); + if (actual_link_speed != adapter->vf_rate_link_speed) { + reset_rate = true; + adapter->vf_rate_link_speed = 0; + dev_info(&adapter->pdev->dev, + "Link speed has been changed. VF Transmit rate is disabled\n"); + } + + for (i = 0; i < adapter->vfs_allocated_count; i++) { + if (reset_rate) + adapter->vf_data[i].tx_rate = 0; + + igb_set_vf_rate_limit(&adapter->hw, i, + adapter->vf_data[i].tx_rate, actual_link_speed); + } +} + +#ifdef HAVE_VF_MIN_MAX_TXRATE +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, + int tx_rate) +#else /* HAVE_VF_MIN_MAX_TXRATE */ +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +#endif /* HAVE_VF_MIN_MAX_TXRATE */ +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + int actual_link_speed; + + if (hw->mac.type != e1000_82576) + return -EOPNOTSUPP; + +#ifdef HAVE_VF_MIN_MAX_TXRATE + if (min_tx_rate) + return -EINVAL; +#endif /* HAVE_VF_MIN_MAX_TXRATE */ + + actual_link_speed = igb_link_mbps(adapter->link_speed); + if ((vf >= adapter->vfs_allocated_count) || + (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) || + (tx_rate < 0) || (tx_rate > actual_link_speed)) + return -EINVAL; + + adapter->vf_rate_link_speed = actual_link_speed; + adapter->vf_data[vf].tx_rate = (u16)tx_rate; + igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + + return 0; +} + +static int igb_ndo_get_vf_config(struct net_device *netdev, + int vf, struct ifla_vf_info *ivi) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + ivi->vf = vf; + memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); +#ifdef HAVE_VF_MIN_MAX_TXRATE + ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; + ivi->min_tx_rate = 0; +#else /* HAVE_VF_MIN_MAX_TXRATE */ + ivi->tx_rate = adapter->vf_data[vf].tx_rate; +#endif /* HAVE_VF_MIN_MAX_TXRATE */ + ivi->vlan = adapter->vf_data[vf].pf_vlan; + ivi->qos = adapter->vf_data[vf].pf_qos; +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; +#endif + return 0; +} +#endif +static void igb_vmm_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int count; + u32 reg; + + switch (hw->mac.type) { + case e1000_82575: + default: + /* replication is not supported for 82575 */ + return; + case e1000_82576: + /* notify HW that the MAC is adding vlan tags */ + reg = E1000_READ_REG(hw, E1000_DTXCTL); + reg |= (E1000_DTXCTL_VLAN_ADDED | + E1000_DTXCTL_SPOOF_INT); + E1000_WRITE_REG(hw, E1000_DTXCTL, reg); + case e1000_82580: + /* enable replication vlan tag stripping */ + reg = E1000_READ_REG(hw, E1000_RPLOLR); + reg |= E1000_RPLOLR_STRVLAN; + E1000_WRITE_REG(hw, E1000_RPLOLR, reg); + case e1000_i350: + case e1000_i354: + /* none of the above registers are supported by i350 */ + break; + } + + /* Enable Malicious Driver Detection */ + if ((adapter->vfs_allocated_count) && + (adapter->mdd)) { + if (hw->mac.type == e1000_i350) + igb_enable_mdd(adapter); + } + + /* enable replication and loopback support */ + count = adapter->vfs_allocated_count || adapter->vmdq_pools; + if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE && count) + e1000_vmdq_set_loopback_pf(hw, 1); + e1000_vmdq_set_anti_spoofing_pf(hw, + adapter->vfs_allocated_count || adapter->vmdq_pools, + adapter->vfs_allocated_count); + e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count || + adapter->vmdq_pools); +} + +static void igb_init_fw(struct igb_adapter *adapter) +{ + struct e1000_fw_drv_info fw_cmd; + struct e1000_hw *hw = &adapter->hw; + int i; + u16 mask; + + if (hw->mac.type == e1000_i210) + mask = E1000_SWFW_EEP_SM; + else + mask = E1000_SWFW_PHY0_SM; + /* i211 parts do not support this feature */ + if (hw->mac.type == e1000_i211) + hw->mac.arc_subsystem_valid = false; + + if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) { + for (i = 0; i <= FW_MAX_RETRIES; i++) { + E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI); + fw_cmd.hdr.cmd = FW_CMD_DRV_INFO; + fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN; + fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED; + fw_cmd.port_num = hw->bus.func; + fw_cmd.drv_version = FW_FAMILY_DRV_VER; + fw_cmd.hdr.checksum = 0; + fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd, + (FW_HDR_LEN + + fw_cmd.hdr.buf_len)); + e1000_host_interface_command(hw, (u8*)&fw_cmd, + sizeof(fw_cmd)); + if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS) + break; + } + } else + dev_warn(pci_dev_to_dev(adapter->pdev), + "Unable to get semaphore, firmware init failed.\n"); + hw->mac.ops.release_swfw_sync(hw, mask); +} + +static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) +{ + struct e1000_hw *hw = &adapter->hw; + u32 dmac_thr; + u16 hwm; + u32 status; + + if (hw->mac.type == e1000_i211) + return; + + if (hw->mac.type > e1000_82580) { + if (adapter->dmac != IGB_DMAC_DISABLE) { + u32 reg; + + /* force threshold to 0. */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); + + /* + * DMA Coalescing high water mark needs to be greater + * than the Rx threshold. Set hwm to PBA - max frame + * size in 16B units, capping it at PBA - 6KB. + */ + hwm = 64 * pba - adapter->max_frame_size / 16; + if (hwm < 64 * (pba - 6)) + hwm = 64 * (pba - 6); + reg = E1000_READ_REG(hw, E1000_FCRTC); + reg &= ~E1000_FCRTC_RTH_COAL_MASK; + reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) + & E1000_FCRTC_RTH_COAL_MASK); + E1000_WRITE_REG(hw, E1000_FCRTC, reg); + + /* + * Set the DMA Coalescing Rx threshold to PBA - 2 * max + * frame size, capping it at PBA - 10KB. + */ + dmac_thr = pba - adapter->max_frame_size / 512; + if (dmac_thr < pba - 10) + dmac_thr = pba - 10; + reg = E1000_READ_REG(hw, E1000_DMACR); + reg &= ~E1000_DMACR_DMACTHR_MASK; + reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) + & E1000_DMACR_DMACTHR_MASK); + + /* transition to L0x or L1 if available..*/ + reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); + + /* Check if status is 2.5Gb backplane connection + * before configuration of watchdog timer, which is + * in msec values in 12.8usec intervals + * watchdog timer= msec values in 32usec intervals + * for non 2.5Gb connection + */ + if (hw->mac.type == e1000_i354) { + status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= ((adapter->dmac * 5) >> 6); + else + reg |= ((adapter->dmac) >> 5); + } else { + reg |= ((adapter->dmac) >> 5); + } + + /* + * Disable BMC-to-OS Watchdog enable + * on devices that support OS-to-BMC + */ + if (hw->mac.type != e1000_i354) + reg &= ~E1000_DMACR_DC_BMC2OSW_EN; + E1000_WRITE_REG(hw, E1000_DMACR, reg); + + /* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/ + E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); + + /* This sets the time to wait before requesting + * transition to low power state to number of usecs + * needed to receive 1 512 byte frame at gigabit + * line rate. On i350 device, time to make transition + * to Lx state is delayed by 4 usec with flush disable + * bit set to avoid losing mailbox interrupts + */ + reg = E1000_READ_REG(hw, E1000_DMCTLX); + if (hw->mac.type == e1000_i350) + reg |= IGB_DMCTLX_DCFLUSH_DIS; + + /* in 2.5Gb connection, TTLX unit is 0.4 usec + * which is 0x4*2 = 0xA. But delay is still 4 usec + */ + if (hw->mac.type == e1000_i354) { + status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= 0xA; + else + reg |= 0x4; + } else { + reg |= 0x4; + } + E1000_WRITE_REG(hw, E1000_DMCTLX, reg); + + /* free space in tx packet buffer to wake from DMA coal */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - + (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); + + /* make low power state decision controlled by DMA coal */ + reg = E1000_READ_REG(hw, E1000_PCIEMISC); + reg &= ~E1000_PCIEMISC_LX_DECISION; + E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); + } /* endif adapter->dmac is not disabled */ + } else if (hw->mac.type == e1000_82580) { + u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); + E1000_WRITE_REG(hw, E1000_PCIEMISC, + reg & ~E1000_PCIEMISC_LX_DECISION); + E1000_WRITE_REG(hw, E1000_DMACR, 0); + } +} + +#ifdef HAVE_I2C_SUPPORT +/* igb_read_i2c_byte - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @dev_addr: device address + * @data: value read + * + * Performs byte read operation over I2C interface at + * a specified device address. + */ +s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); + struct i2c_client *this_client = adapter->i2c_client; + s32 status; + u16 swfw_mask = 0; + + if (!this_client) + return E1000_ERR_I2C; + + swfw_mask = E1000_SWFW_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) + != E1000_SUCCESS) + return E1000_ERR_SWFW_SYNC; + + status = i2c_smbus_read_byte_data(this_client, byte_offset); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + if (status < 0) + return E1000_ERR_I2C; + else { + *data = status; + return E1000_SUCCESS; + } +} + +/* igb_write_i2c_byte - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: value to write + * + * Performs byte write operation over I2C interface at + * a specified device address. + */ +s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); + struct i2c_client *this_client = adapter->i2c_client; + s32 status; + u16 swfw_mask = E1000_SWFW_PHY0_SM; + + if (!this_client) + return E1000_ERR_I2C; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) + return E1000_ERR_SWFW_SYNC; + status = i2c_smbus_write_byte_data(this_client, byte_offset, data); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + if (status) + return E1000_ERR_I2C; + else + return E1000_SUCCESS; +} +#endif /* HAVE_I2C_SUPPORT */ +/* igb_main.c */ + + +/** + * igb_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in igb_pci_tbl + * + * Returns 0 on success, negative on failure + * + * igb_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +int igb_kni_probe(struct pci_dev *pdev, + struct net_device **lad_dev) +{ + struct net_device *netdev; + struct igb_adapter *adapter; + struct e1000_hw *hw; + u16 eeprom_data = 0; + u8 pba_str[E1000_PBANUM_LENGTH]; + s32 ret_val; + static int global_quad_port_a; /* global quad port a indication */ + int i, err, pci_using_dac = 0; + static int cards_found; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + +#ifdef NO_KNI + pci_using_dac = 0; + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) + pci_using_dac = 1; + } else { + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + IGB_ERR("No usable DMA configuration, " + "aborting\n"); + goto err_dma; + } + } + } + +#ifndef HAVE_ASPM_QUIRKS + /* 82575 requires that the pci-e link partner disable the L0s state */ + switch (pdev->device) { + case E1000_DEV_ID_82575EB_COPPER: + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82575GB_QUAD_COPPER: + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + default: + break; + } + +#endif /* HAVE_ASPM_QUIRKS */ + err = pci_request_selected_regions(pdev, + pci_select_bars(pdev, + IORESOURCE_MEM), + igb_driver_name); + if (err) + goto err_pci_reg; + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); + + err = -ENOMEM; +#endif /* NO_KNI */ +#ifdef HAVE_TX_MQ + netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), + IGB_MAX_TX_QUEUES); +#else + netdev = alloc_etherdev(sizeof(struct igb_adapter)); +#endif /* HAVE_TX_MQ */ + if (!netdev) + goto err_alloc_etherdev; + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + //pci_set_drvdata(pdev, netdev); + adapter = netdev_priv(netdev); + adapter->netdev = netdev; + adapter->pdev = pdev; + hw = &adapter->hw; + hw->back = adapter; + adapter->port_num = hw->bus.func; + adapter->msg_enable = (1 << debug) - 1; + +#ifdef HAVE_PCI_ERS + err = pci_save_state(pdev); + if (err) + goto err_ioremap; +#endif + err = -EIO; + hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!hw->hw_addr) + goto err_ioremap; + +#ifdef HAVE_NET_DEVICE_OPS + netdev->netdev_ops = &igb_netdev_ops; +#else /* HAVE_NET_DEVICE_OPS */ + netdev->open = &igb_open; + netdev->stop = &igb_close; + netdev->get_stats = &igb_get_stats; +#ifdef HAVE_SET_RX_MODE + netdev->set_rx_mode = &igb_set_rx_mode; +#endif + netdev->set_multicast_list = &igb_set_rx_mode; + netdev->set_mac_address = &igb_set_mac; + netdev->change_mtu = &igb_change_mtu; + netdev->do_ioctl = &igb_ioctl; +#ifdef HAVE_TX_TIMEOUT + netdev->tx_timeout = &igb_tx_timeout; +#endif + netdev->vlan_rx_register = igb_vlan_mode; + netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid; + netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + netdev->poll_controller = igb_netpoll; +#endif + netdev->hard_start_xmit = &igb_xmit_frame; +#endif /* HAVE_NET_DEVICE_OPS */ + igb_set_ethtool_ops(netdev); +#ifdef HAVE_TX_TIMEOUT + netdev->watchdog_timeo = 5 * HZ; +#endif + + strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); + + adapter->bd_number = cards_found; + + /* setup the private structure */ + err = igb_sw_init(adapter); + if (err) + goto err_sw_init; + + e1000_get_bus_info(hw); + + hw->phy.autoneg_wait_to_complete = FALSE; + hw->mac.adaptive_ifs = FALSE; + + /* Copper options */ + if (hw->phy.media_type == e1000_media_type_copper) { + hw->phy.mdix = AUTO_ALL_MODES; + hw->phy.disable_polarity_correction = FALSE; + hw->phy.ms_type = e1000_ms_hw_default; + } + + if (e1000_check_reset_block(hw)) + dev_info(pci_dev_to_dev(pdev), + "PHY reset is blocked due to SOL/IDER session.\n"); + + /* + * features is initialized to 0 in allocation, it might have bits + * set by igb_sw_init so we should use an or instead of an + * assignment. + */ + netdev->features |= NETIF_F_SG | + NETIF_F_IP_CSUM | +#ifdef NETIF_F_IPV6_CSUM + NETIF_F_IPV6_CSUM | +#endif +#ifdef NETIF_F_TSO + NETIF_F_TSO | +#ifdef NETIF_F_TSO6 + NETIF_F_TSO6 | +#endif +#endif /* NETIF_F_TSO */ +#ifdef NETIF_F_RXHASH + NETIF_F_RXHASH | +#endif + NETIF_F_RXCSUM | +#ifdef NETIF_F_HW_VLAN_CTAG_RX + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; +#else + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_TX; +#endif + + if (hw->mac.type >= e1000_82576) + netdev->features |= NETIF_F_SCTP_CSUM; + +#ifdef HAVE_NDO_SET_FEATURES + /* copy netdev features into list of user selectable features */ + netdev->hw_features |= netdev->features; +#ifndef IGB_NO_LRO + + /* give us the option of enabling LRO later */ + netdev->hw_features |= NETIF_F_LRO; +#endif +#else +#ifdef NETIF_F_GRO + + /* this is only needed on kernels prior to 2.6.39 */ + netdev->features |= NETIF_F_GRO; +#endif +#endif + + /* set this bit last since it cannot be part of hw_features */ +#ifdef NETIF_F_HW_VLAN_CTAG_FILTER + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; +#else + netdev->features |= NETIF_F_HW_VLAN_FILTER; +#endif + +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_SG; + +#endif + if (pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + +#ifdef NO_KNI + adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); +#ifdef DEBUG + if (adapter->dmac != IGB_DMAC_DISABLE) + printk("%s: DMA Coalescing is enabled..\n", netdev->name); +#endif + + /* before reading the NVM, reset the controller to put the device in a + * known good starting state */ + e1000_reset_hw(hw); +#endif /* NO_KNI */ + + /* make sure the NVM is good */ + if (e1000_validate_nvm_checksum(hw) < 0) { + dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not" + " Valid\n"); + err = -EIO; + goto err_eeprom; + } + + /* copy the MAC address out of the NVM */ + if (e1000_read_mac_addr(hw)) + dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n"); + memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); +#ifdef ETHTOOL_GPERMADDR + memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); + + if (!is_valid_ether_addr(netdev->perm_addr)) { +#else + if (!is_valid_ether_addr(netdev->dev_addr)) { +#endif + dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n"); + err = -EIO; + goto err_eeprom; + } + + memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len); + adapter->mac_table[0].queue = adapter->vfs_allocated_count; + adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE); + igb_rar_set(adapter, 0); + + /* get firmware version for ethtool -i */ + igb_set_fw_version(adapter); + + /* Check if Media Autosense is enabled */ + if (hw->mac.type == e1000_82580) + igb_init_mas(adapter); + +#ifdef NO_KNI + setup_timer(&adapter->watchdog_timer, &igb_watchdog, + (unsigned long) adapter); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer, + (unsigned long) adapter); + setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, + (unsigned long) adapter); + + INIT_WORK(&adapter->reset_task, igb_reset_task); + INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + INIT_WORK(&adapter->dma_err_task, igb_dma_err_task); +#endif + + /* Initialize link properties that are user-changeable */ + adapter->fc_autoneg = true; + hw->mac.autoneg = true; + hw->phy.autoneg_advertised = 0x2f; + + hw->fc.requested_mode = e1000_fc_default; + hw->fc.current_mode = e1000_fc_default; + + e1000_validate_mdi_setting(hw); + + /* By default, support wake on port A */ + if (hw->bus.func == 0) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* Check the NVM for wake support for non-port A ports */ + if (hw->mac.type >= e1000_82580) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &eeprom_data); + else if (hw->bus.func == 1) + e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); + + if (eeprom_data & IGB_EEPROM_APME) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* now that we have the eeprom settings, apply the special cases where + * the eeprom may be wrong or the board simply won't support wake on + * lan on a particular port */ + switch (pdev->device) { + case E1000_DEV_ID_82575GB_QUAD_COPPER: + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: + /* Wake events only supported on port A for dual fiber + * regardless of eeprom setting */ + if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + else + adapter->flags |= IGB_FLAG_QUAD_PORT_A; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; + default: + /* If the device can't wake, don't set software support */ + if (!device_can_wakeup(&adapter->pdev->dev)) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + } + + /* initialize the wol settings based on the eeprom settings */ + if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) + adapter->wol |= E1000_WUFC_MAG; + + /* Some vendors want WoL disabled by default, but still supported */ + if ((hw->mac.type == e1000_i350) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + +#ifdef NO_KNI + device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), + adapter->flags & IGB_FLAG_WOL_SUPPORTED); + + /* reset the hardware with the new settings */ + igb_reset(adapter); + adapter->devrc = 0; + +#ifdef HAVE_I2C_SUPPORT + /* Init the I2C interface */ + err = igb_init_i2c(adapter); + if (err) { + dev_err(&pdev->dev, "failed to init i2c interface\n"); + goto err_eeprom; + } +#endif /* HAVE_I2C_SUPPORT */ + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + + strncpy(netdev->name, "eth%d", IFNAMSIZ); + err = register_netdev(netdev); + if (err) + goto err_register; + +#ifdef CONFIG_IGB_VMDQ_NETDEV + err = igb_init_vmdq_netdevs(adapter); + if (err) + goto err_register; +#endif + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + +#ifdef IGB_DCA + if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) { + adapter->flags |= IGB_FLAG_DCA_ENABLED; + dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); + igb_setup_dca(adapter); + } + +#endif +#ifdef HAVE_PTP_1588_CLOCK + /* do hw tstamp init after resetting */ + igb_ptp_init(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + +#endif /* NO_KNI */ + dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n"); + /* print bus type/speed/width info */ + dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ", + netdev->name, + ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" : + (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown"), + ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : + (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : + (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown")); + dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name); + for (i = 0; i < 6; i++) + printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); + + ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH); + if (ret_val) + strncpy(pba_str, "Unknown", sizeof(pba_str) - 1); + dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name, + pba_str); + + + /* Initialize the thermal sensor on i350 devices. */ + if (hw->mac.type == e1000_i350) { + if (hw->bus.func == 0) { + u16 ets_word; + + /* + * Read the NVM to determine if this i350 device + * supports an external thermal sensor. + */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word); + if (ets_word != 0x0000 && ets_word != 0xFFFF) + adapter->ets = true; + else + adapter->ets = false; + } +#ifdef NO_KNI +#ifdef IGB_HWMON + + igb_sysfs_init(adapter); +#else +#ifdef IGB_PROCFS + + igb_procfs_init(adapter); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ +#endif /* NO_KNI */ + } else { + adapter->ets = false; + } + + if (hw->phy.media_type == e1000_media_type_copper) { + switch (hw->mac.type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + /* Enable EEE for internal copper PHY devices */ + err = e1000_set_eee_i350(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + break; + case e1000_i354: + if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) & + (E1000_CTRL_EXT_LINK_MODE_SGMII)) { + err = e1000_set_eee_i354(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + } + break; + default: + break; + } + } + + /* send driver version info to firmware */ + if (hw->mac.type >= e1000_i350) + igb_init_fw(adapter); + +#ifndef IGB_NO_LRO + if (netdev->features & NETIF_F_LRO) + dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n"); + else + dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n"); +#endif + dev_info(pci_dev_to_dev(pdev), + "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", + adapter->msix_entries ? "MSI-X" : + (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", + adapter->num_rx_queues, adapter->num_tx_queues); + + cards_found++; + *lad_dev = netdev; + + pm_runtime_put_noidle(&pdev->dev); + return 0; + +//err_register: +// igb_release_hw_control(adapter); +#ifdef HAVE_I2C_SUPPORT + memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); +#endif /* HAVE_I2C_SUPPORT */ +err_eeprom: +// if (!e1000_check_reset_block(hw)) +// e1000_phy_hw_reset(hw); + + if (hw->flash_address) + iounmap(hw->flash_address); +err_sw_init: +// igb_clear_interrupt_scheme(adapter); +// igb_reset_sriov_capability(adapter); + iounmap(hw->hw_addr); +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: +// pci_release_selected_regions(pdev, +// pci_select_bars(pdev, IORESOURCE_MEM)); +//err_pci_reg: +//err_dma: + pci_disable_device(pdev); + return err; +} + + +void igb_kni_remove(struct pci_dev *pdev) +{ + pci_disable_device(pdev); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c new file mode 100644 index 00000000..f79ce7c1 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c @@ -0,0 +1,847 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#include + +#include "igb.h" + +/* This is the only thing that needs to be changed to adjust the + * maximum number of ports that the driver can manage. + */ + +#define IGB_MAX_NIC 32 + +#define OPTION_UNSET -1 +#define OPTION_DISABLED 0 +#define OPTION_ENABLED 1 +#define MAX_NUM_LIST_OPTS 15 + +/* All parameters are treated the same, as an integer array of values. + * This macro just reduces the need to repeat the same declaration code + * over and over (plus this helps to avoid typo bugs). + */ + +#define IGB_PARAM_INIT { [0 ... IGB_MAX_NIC] = OPTION_UNSET } +#ifndef module_param_array +/* Module Parameters are always initialized to -1, so that the driver + * can tell the difference between no user specified value or the + * user asking for the default value. + * The true default values are loaded in when igb_check_options is called. + * + * This is a GCC extension to ANSI C. + * See the item "Labeled Elements in Initializers" in the section + * "Extensions to the C Language Family" of the GCC documentation. + */ + +#define IGB_PARAM(X, desc) \ + static const int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \ + MODULE_PARM(X, "1-" __MODULE_STRING(IGB_MAX_NIC) "i"); \ + MODULE_PARM_DESC(X, desc); +#else +#define IGB_PARAM(X, desc) \ + static int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \ + static unsigned int num_##X; \ + module_param_array_named(X, X, int, &num_##X, 0); \ + MODULE_PARM_DESC(X, desc); +#endif + +/* Interrupt Throttle Rate (interrupts/sec) + * + * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative) + */ +IGB_PARAM(InterruptThrottleRate, + "Maximum interrupts per second, per vector, (max 100000), default 3=adaptive"); +#define DEFAULT_ITR 3 +#define MAX_ITR 100000 +/* #define MIN_ITR 120 */ +#define MIN_ITR 0 +/* IntMode (Interrupt Mode) + * + * Valid Range: 0 - 2 + * + * Default Value: 2 (MSI-X) + */ +IGB_PARAM(IntMode, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), default 2"); +#define MAX_INTMODE IGB_INT_MODE_MSIX +#define MIN_INTMODE IGB_INT_MODE_LEGACY + +IGB_PARAM(Node, "set the starting node to allocate memory on, default -1"); + +/* LLIPort (Low Latency Interrupt TCP Port) + * + * Valid Range: 0 - 65535 + * + * Default Value: 0 (disabled) + */ +IGB_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535), default 0=off"); + +#define DEFAULT_LLIPORT 0 +#define MAX_LLIPORT 0xFFFF +#define MIN_LLIPORT 0 + +/* LLIPush (Low Latency Interrupt on TCP Push flag) + * + * Valid Range: 0, 1 + * + * Default Value: 0 (disabled) + */ +IGB_PARAM(LLIPush, "Low Latency Interrupt on TCP Push flag (0,1), default 0=off"); + +#define DEFAULT_LLIPUSH 0 +#define MAX_LLIPUSH 1 +#define MIN_LLIPUSH 0 + +/* LLISize (Low Latency Interrupt on Packet Size) + * + * Valid Range: 0 - 1500 + * + * Default Value: 0 (disabled) + */ +IGB_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500), default 0=off"); + +#define DEFAULT_LLISIZE 0 +#define MAX_LLISIZE 1500 +#define MIN_LLISIZE 0 + +/* RSS (Enable RSS multiqueue receive) + * + * Valid Range: 0 - 8 + * + * Default Value: 1 + */ +IGB_PARAM(RSS, "Number of Receive-Side Scaling Descriptor Queues (0-8), default 1, 0=number of cpus"); + +#define DEFAULT_RSS 1 +#define MAX_RSS 8 +#define MIN_RSS 0 + +/* VMDQ (Enable VMDq multiqueue receive) + * + * Valid Range: 0 - 8 + * + * Default Value: 0 + */ +IGB_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0-1 = disable, 2-8 enable, default 0"); + +#define DEFAULT_VMDQ 0 +#define MAX_VMDQ MAX_RSS +#define MIN_VMDQ 0 + +/* max_vfs (Enable SR-IOV VF devices) + * + * Valid Range: 0 - 7 + * + * Default Value: 0 + */ +IGB_PARAM(max_vfs, "Number of Virtual Functions: 0 = disable, 1-7 enable, default 0"); + +#define DEFAULT_SRIOV 0 +#define MAX_SRIOV 7 +#define MIN_SRIOV 0 + +/* MDD (Enable Malicious Driver Detection) + * + * Only available when SR-IOV is enabled - max_vfs is greater than 0 + * + * Valid Range: 0, 1 + * + * Default Value: 1 + */ +IGB_PARAM(MDD, "Malicious Driver Detection (0/1), default 1 = enabled. " + "Only available when max_vfs is greater than 0"); + +#ifdef DEBUG + +/* Disable Hardware Reset on Tx Hang + * + * Valid Range: 0, 1 + * + * Default Value: 0 (disabled, i.e. h/w will reset) + */ +IGB_PARAM(DisableHwReset, "Disable reset of hardware on Tx hang"); + +/* Dump Transmit and Receive buffers + * + * Valid Range: 0, 1 + * + * Default Value: 0 + */ +IGB_PARAM(DumpBuffers, "Dump Tx/Rx buffers on Tx hang or by request"); + +#endif /* DEBUG */ + +/* QueuePairs (Enable TX/RX queue pairs for interrupt handling) + * + * Valid Range: 0 - 1 + * + * Default Value: 1 + */ +IGB_PARAM(QueuePairs, "Enable Tx/Rx queue pairs for interrupt handling (0,1), default 1=on"); + +#define DEFAULT_QUEUE_PAIRS 1 +#define MAX_QUEUE_PAIRS 1 +#define MIN_QUEUE_PAIRS 0 + +/* Enable/disable EEE (a.k.a. IEEE802.3az) + * + * Valid Range: 0, 1 + * + * Default Value: 1 + */ + IGB_PARAM(EEE, "Enable/disable on parts that support the feature"); + +/* Enable/disable DMA Coalescing + * + * Valid Values: 0(off), 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, + * 9000, 10000(msec), 250(usec), 500(usec) + * + * Default Value: 0 + */ + IGB_PARAM(DMAC, "Disable or set latency for DMA Coalescing ((0=off, 1000-10000(msec), 250, 500 (usec))"); + +#ifndef IGB_NO_LRO +/* Enable/disable Large Receive Offload + * + * Valid Values: 0(off), 1(on) + * + * Default Value: 0 + */ + IGB_PARAM(LRO, "Large Receive Offload (0,1), default 0=off"); + +#endif +struct igb_opt_list { + int i; + char *str; +}; +struct igb_option { + enum { enable_option, range_option, list_option } type; + const char *name; + const char *err; + int def; + union { + struct { /* range_option info */ + int min; + int max; + } r; + struct { /* list_option info */ + int nr; + struct igb_opt_list *p; + } l; + } arg; +}; + +static int igb_validate_option(unsigned int *value, + struct igb_option *opt, + struct igb_adapter *adapter) +{ + if (*value == OPTION_UNSET) { + *value = opt->def; + return 0; + } + + switch (opt->type) { + case enable_option: + switch (*value) { + case OPTION_ENABLED: + DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name); + return 0; + case OPTION_DISABLED: + DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name); + return 0; + } + break; + case range_option: + if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { + DPRINTK(PROBE, INFO, + "%s set to %d\n", opt->name, *value); + return 0; + } + break; + case list_option: { + int i; + struct igb_opt_list *ent; + + for (i = 0; i < opt->arg.l.nr; i++) { + ent = &opt->arg.l.p[i]; + if (*value == ent->i) { + if (ent->str[0] != '\0') + DPRINTK(PROBE, INFO, "%s\n", ent->str); + return 0; + } + } + } + break; + default: + BUG(); + } + + DPRINTK(PROBE, INFO, "Invalid %s value specified (%d) %s\n", + opt->name, *value, opt->err); + *value = opt->def; + return -1; +} + +/** + * igb_check_options - Range Checking for Command Line Parameters + * @adapter: board private structure + * + * This routine checks all command line parameters for valid user + * input. If an invalid value is given, or if no user specified + * value exists, a default value is used. The final value is stored + * in a variable in the adapter structure. + **/ + +void igb_check_options(struct igb_adapter *adapter) +{ + int bd = adapter->bd_number; + struct e1000_hw *hw = &adapter->hw; + + if (bd >= IGB_MAX_NIC) { + DPRINTK(PROBE, NOTICE, + "Warning: no configuration for board #%d\n", bd); + DPRINTK(PROBE, NOTICE, "Using defaults for all values\n"); +#ifndef module_param_array + bd = IGB_MAX_NIC; +#endif + } + + { /* Interrupt Throttling Rate */ + struct igb_option opt = { + .type = range_option, + .name = "Interrupt Throttling Rate (ints/sec)", + .err = "using default of " __MODULE_STRING(DEFAULT_ITR), + .def = DEFAULT_ITR, + .arg = { .r = { .min = MIN_ITR, + .max = MAX_ITR } } + }; + +#ifdef module_param_array + if (num_InterruptThrottleRate > bd) { +#endif + unsigned int itr = InterruptThrottleRate[bd]; + + switch (itr) { + case 0: + DPRINTK(PROBE, INFO, "%s turned off\n", + opt.name); + if (hw->mac.type >= e1000_i350) + adapter->dmac = IGB_DMAC_DISABLE; + adapter->rx_itr_setting = itr; + break; + case 1: + DPRINTK(PROBE, INFO, "%s set to dynamic mode\n", + opt.name); + adapter->rx_itr_setting = itr; + break; + case 3: + DPRINTK(PROBE, INFO, + "%s set to dynamic conservative mode\n", + opt.name); + adapter->rx_itr_setting = itr; + break; + default: + igb_validate_option(&itr, &opt, adapter); + /* Save the setting, because the dynamic bits + * change itr. In case of invalid user value, + * default to conservative mode, else need to + * clear the lower two bits because they are + * used as control */ + if (itr == 3) { + adapter->rx_itr_setting = itr; + } else { + adapter->rx_itr_setting = 1000000000 / + (itr * 256); + adapter->rx_itr_setting &= ~3; + } + break; + } +#ifdef module_param_array + } else { + adapter->rx_itr_setting = opt.def; + } +#endif + adapter->tx_itr_setting = adapter->rx_itr_setting; + } + { /* Interrupt Mode */ + struct igb_option opt = { + .type = range_option, + .name = "Interrupt Mode", + .err = "defaulting to 2 (MSI-X)", + .def = IGB_INT_MODE_MSIX, + .arg = { .r = { .min = MIN_INTMODE, + .max = MAX_INTMODE } } + }; + +#ifdef module_param_array + if (num_IntMode > bd) { +#endif + unsigned int int_mode = IntMode[bd]; + igb_validate_option(&int_mode, &opt, adapter); + adapter->int_mode = int_mode; +#ifdef module_param_array + } else { + adapter->int_mode = opt.def; + } +#endif + } + { /* Low Latency Interrupt TCP Port */ + struct igb_option opt = { + .type = range_option, + .name = "Low Latency Interrupt TCP Port", + .err = "using default of " __MODULE_STRING(DEFAULT_LLIPORT), + .def = DEFAULT_LLIPORT, + .arg = { .r = { .min = MIN_LLIPORT, + .max = MAX_LLIPORT } } + }; + +#ifdef module_param_array + if (num_LLIPort > bd) { +#endif + adapter->lli_port = LLIPort[bd]; + if (adapter->lli_port) { + igb_validate_option(&adapter->lli_port, &opt, + adapter); + } else { + DPRINTK(PROBE, INFO, "%s turned off\n", + opt.name); + } +#ifdef module_param_array + } else { + adapter->lli_port = opt.def; + } +#endif + } + { /* Low Latency Interrupt on Packet Size */ + struct igb_option opt = { + .type = range_option, + .name = "Low Latency Interrupt on Packet Size", + .err = "using default of " __MODULE_STRING(DEFAULT_LLISIZE), + .def = DEFAULT_LLISIZE, + .arg = { .r = { .min = MIN_LLISIZE, + .max = MAX_LLISIZE } } + }; + +#ifdef module_param_array + if (num_LLISize > bd) { +#endif + adapter->lli_size = LLISize[bd]; + if (adapter->lli_size) { + igb_validate_option(&adapter->lli_size, &opt, + adapter); + } else { + DPRINTK(PROBE, INFO, "%s turned off\n", + opt.name); + } +#ifdef module_param_array + } else { + adapter->lli_size = opt.def; + } +#endif + } + { /* Low Latency Interrupt on TCP Push flag */ + struct igb_option opt = { + .type = enable_option, + .name = "Low Latency Interrupt on TCP Push flag", + .err = "defaulting to Disabled", + .def = OPTION_DISABLED + }; + +#ifdef module_param_array + if (num_LLIPush > bd) { +#endif + unsigned int lli_push = LLIPush[bd]; + igb_validate_option(&lli_push, &opt, adapter); + adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0; +#ifdef module_param_array + } else { + adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0; + } +#endif + } + { /* SRIOV - Enable SR-IOV VF devices */ + struct igb_option opt = { + .type = range_option, + .name = "max_vfs - SR-IOV VF devices", + .err = "using default of " __MODULE_STRING(DEFAULT_SRIOV), + .def = DEFAULT_SRIOV, + .arg = { .r = { .min = MIN_SRIOV, + .max = MAX_SRIOV } } + }; + +#ifdef module_param_array + if (num_max_vfs > bd) { +#endif + adapter->vfs_allocated_count = max_vfs[bd]; + igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter); + +#ifdef module_param_array + } else { + adapter->vfs_allocated_count = opt.def; + } +#endif + if (adapter->vfs_allocated_count) { + switch (hw->mac.type) { + case e1000_82575: + case e1000_82580: + case e1000_i210: + case e1000_i211: + case e1000_i354: + adapter->vfs_allocated_count = 0; + DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n"); + default: + break; + } + } + } + { /* VMDQ - Enable VMDq multiqueue receive */ + struct igb_option opt = { + .type = range_option, + .name = "VMDQ - VMDq multiqueue queue count", + .err = "using default of " __MODULE_STRING(DEFAULT_VMDQ), + .def = DEFAULT_VMDQ, + .arg = { .r = { .min = MIN_VMDQ, + .max = (MAX_VMDQ - adapter->vfs_allocated_count) } } + }; + if ((hw->mac.type != e1000_i210) || + (hw->mac.type != e1000_i211)) { +#ifdef module_param_array + if (num_VMDQ > bd) { +#endif + adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]); + if (adapter->vfs_allocated_count && !adapter->vmdq_pools) { + DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n"); + adapter->vmdq_pools = 1; + } + igb_validate_option(&adapter->vmdq_pools, &opt, adapter); + +#ifdef module_param_array + } else { + if (!adapter->vfs_allocated_count) + adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def); + else + adapter->vmdq_pools = 1; + } +#endif +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) { + DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n"); + adapter->vmdq_pools = 0; + } +#endif + + } else { + DPRINTK(PROBE, INFO, "VMDq option is not supported.\n"); + adapter->vmdq_pools = opt.def; + } + } + { /* RSS - Enable RSS multiqueue receives */ + struct igb_option opt = { + .type = range_option, + .name = "RSS - RSS multiqueue receive count", + .err = "using default of " __MODULE_STRING(DEFAULT_RSS), + .def = DEFAULT_RSS, + .arg = { .r = { .min = MIN_RSS, + .max = MAX_RSS } } + }; + + switch (hw->mac.type) { + case e1000_82575: +#ifndef CONFIG_IGB_VMDQ_NETDEV + if (!!adapter->vmdq_pools) { + if (adapter->vmdq_pools <= 2) { + if (adapter->vmdq_pools == 2) + opt.arg.r.max = 3; + } else { + opt.arg.r.max = 1; + } + } else { + opt.arg.r.max = 4; + } +#else + opt.arg.r.max = !!adapter->vmdq_pools ? 1 : 4; +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + break; + case e1000_i210: + opt.arg.r.max = 4; + break; + case e1000_i211: + opt.arg.r.max = 2; + break; + case e1000_82576: +#ifndef CONFIG_IGB_VMDQ_NETDEV + if (!!adapter->vmdq_pools) + opt.arg.r.max = 2; + break; +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + case e1000_82580: + case e1000_i350: + case e1000_i354: + default: + if (!!adapter->vmdq_pools) + opt.arg.r.max = 1; + break; + } + + if (adapter->int_mode != IGB_INT_MODE_MSIX) { + DPRINTK(PROBE, INFO, "RSS is not supported when in MSI/Legacy Interrupt mode, %s\n", + opt.err); + opt.arg.r.max = 1; + } + +#ifdef module_param_array + if (num_RSS > bd) { +#endif + adapter->rss_queues = RSS[bd]; + switch (adapter->rss_queues) { + case 1: + break; + default: + igb_validate_option(&adapter->rss_queues, &opt, adapter); + if (adapter->rss_queues) + break; + case 0: + adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus()); + break; + } +#ifdef module_param_array + } else { + adapter->rss_queues = opt.def; + } +#endif + } + { /* QueuePairs - Enable Tx/Rx queue pairs for interrupt handling */ + struct igb_option opt = { + .type = enable_option, + .name = "QueuePairs - Tx/Rx queue pairs for interrupt handling", + .err = "defaulting to Enabled", + .def = OPTION_ENABLED + }; +#ifdef module_param_array + if (num_QueuePairs > bd) { +#endif + unsigned int qp = QueuePairs[bd]; + /* + * We must enable queue pairs if the number of queues + * exceeds the number of available interrupts. We are + * limited to 10, or 3 per unallocated vf. On I210 and + * I211 devices, we are limited to 5 interrupts. + * However, since I211 only supports 2 queues, we do not + * need to check and override the user option. + */ + if (qp == OPTION_DISABLED) { + if (adapter->rss_queues > 4) + qp = OPTION_ENABLED; + + if (adapter->vmdq_pools > 4) + qp = OPTION_ENABLED; + + if (adapter->rss_queues > 1 && + (adapter->vmdq_pools > 3 || + adapter->vfs_allocated_count > 6)) + qp = OPTION_ENABLED; + + if (hw->mac.type == e1000_i210 && + adapter->rss_queues > 2) + qp = OPTION_ENABLED; + + if (qp == OPTION_ENABLED) + DPRINTK(PROBE, INFO, "Number of queues exceeds available interrupts, %s\n", + opt.err); + } + igb_validate_option(&qp, &opt, adapter); + adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0; +#ifdef module_param_array + } else { + adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0; + } +#endif + } + { /* EEE - Enable EEE for capable adapters */ + + if (hw->mac.type >= e1000_i350) { + struct igb_option opt = { + .type = enable_option, + .name = "EEE Support", + .err = "defaulting to Enabled", + .def = OPTION_ENABLED + }; +#ifdef module_param_array + if (num_EEE > bd) { +#endif + unsigned int eee = EEE[bd]; + igb_validate_option(&eee, &opt, adapter); + adapter->flags |= eee ? IGB_FLAG_EEE : 0; + if (eee) + hw->dev_spec._82575.eee_disable = false; + else + hw->dev_spec._82575.eee_disable = true; + +#ifdef module_param_array + } else { + adapter->flags |= opt.def ? IGB_FLAG_EEE : 0; + if (adapter->flags & IGB_FLAG_EEE) + hw->dev_spec._82575.eee_disable = false; + else + hw->dev_spec._82575.eee_disable = true; + } +#endif + } + } + { /* DMAC - Enable DMA Coalescing for capable adapters */ + + if (hw->mac.type >= e1000_i350) { + struct igb_opt_list list [] = { + { IGB_DMAC_DISABLE, "DMAC Disable"}, + { IGB_DMAC_MIN, "DMAC 250 usec"}, + { IGB_DMAC_500, "DMAC 500 usec"}, + { IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"}, + { IGB_DMAC_2000, "DMAC 2000 usec"}, + { IGB_DMAC_3000, "DMAC 3000 usec"}, + { IGB_DMAC_4000, "DMAC 4000 usec"}, + { IGB_DMAC_5000, "DMAC 5000 usec"}, + { IGB_DMAC_6000, "DMAC 6000 usec"}, + { IGB_DMAC_7000, "DMAC 7000 usec"}, + { IGB_DMAC_8000, "DMAC 8000 usec"}, + { IGB_DMAC_9000, "DMAC 9000 usec"}, + { IGB_DMAC_MAX, "DMAC 10000 usec"} + }; + struct igb_option opt = { + .type = list_option, + .name = "DMA Coalescing", + .err = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE), + .def = IGB_DMAC_DISABLE, + .arg = { .l = { .nr = 13, + .p = list + } + } + }; +#ifdef module_param_array + if (num_DMAC > bd) { +#endif + unsigned int dmac = DMAC[bd]; + if (adapter->rx_itr_setting == IGB_DMAC_DISABLE) + dmac = IGB_DMAC_DISABLE; + igb_validate_option(&dmac, &opt, adapter); + switch (dmac) { + case IGB_DMAC_DISABLE: + adapter->dmac = dmac; + break; + case IGB_DMAC_MIN: + adapter->dmac = dmac; + break; + case IGB_DMAC_500: + adapter->dmac = dmac; + break; + case IGB_DMAC_EN_DEFAULT: + adapter->dmac = dmac; + break; + case IGB_DMAC_2000: + adapter->dmac = dmac; + break; + case IGB_DMAC_3000: + adapter->dmac = dmac; + break; + case IGB_DMAC_4000: + adapter->dmac = dmac; + break; + case IGB_DMAC_5000: + adapter->dmac = dmac; + break; + case IGB_DMAC_6000: + adapter->dmac = dmac; + break; + case IGB_DMAC_7000: + adapter->dmac = dmac; + break; + case IGB_DMAC_8000: + adapter->dmac = dmac; + break; + case IGB_DMAC_9000: + adapter->dmac = dmac; + break; + case IGB_DMAC_MAX: + adapter->dmac = dmac; + break; + default: + adapter->dmac = opt.def; + DPRINTK(PROBE, INFO, + "Invalid DMAC setting, " + "resetting DMAC to %d\n", opt.def); + } +#ifdef module_param_array + } else + adapter->dmac = opt.def; +#endif + } + } +#ifndef IGB_NO_LRO + { /* LRO - Enable Large Receive Offload */ + struct igb_option opt = { + .type = enable_option, + .name = "LRO - Large Receive Offload", + .err = "defaulting to Disabled", + .def = OPTION_DISABLED + }; + struct net_device *netdev = adapter->netdev; +#ifdef module_param_array + if (num_LRO > bd) { +#endif + unsigned int lro = LRO[bd]; + igb_validate_option(&lro, &opt, adapter); + netdev->features |= lro ? NETIF_F_LRO : 0; +#ifdef module_param_array + } else if (opt.def == OPTION_ENABLED) { + netdev->features |= NETIF_F_LRO; + } +#endif + } +#endif /* IGB_NO_LRO */ + { /* MDD - Enable Malicious Driver Detection. Only available when + SR-IOV is enabled. */ + struct igb_option opt = { + .type = enable_option, + .name = "Malicious Driver Detection", + .err = "defaulting to 1", + .def = OPTION_ENABLED, + .arg = { .r = { .min = OPTION_DISABLED, + .max = OPTION_ENABLED } } + }; + +#ifdef module_param_array + if (num_MDD > bd) { +#endif + adapter->mdd = MDD[bd]; + igb_validate_option((uint *)&adapter->mdd, &opt, + adapter); +#ifdef module_param_array + } else { + adapter->mdd = opt.def; + } +#endif + } +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c new file mode 100644 index 00000000..66236d29 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c @@ -0,0 +1,363 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" +#include "e1000_82575.h" +#include "e1000_hw.h" + +#ifdef IGB_PROCFS +#ifndef IGB_HWMON + +#include +#include +#include +#include +#include + +static struct proc_dir_entry *igb_top_dir = NULL; + + +bool igb_thermal_present(struct igb_adapter *adapter) +{ + s32 status; + struct e1000_hw *hw; + + if (adapter == NULL) + return false; + hw = &adapter->hw; + + /* + * Only set I2C bit-bang mode if an external thermal sensor is + * supported on this device. + */ + if (adapter->ets) { + status = e1000_set_i2c_bb(hw); + if (status != E1000_SUCCESS) + return false; + } + + status = hw->mac.ops.init_thermal_sensor_thresh(hw); + if (status != E1000_SUCCESS) + return false; + + return true; +} + + +static int igb_macburn(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct e1000_hw *hw; + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + hw = &adapter->hw; + if (hw == NULL) + return snprintf(page, count, "error: no hw data\n"); + + return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", + (unsigned int)hw->mac.perm_addr[0], + (unsigned int)hw->mac.perm_addr[1], + (unsigned int)hw->mac.perm_addr[2], + (unsigned int)hw->mac.perm_addr[3], + (unsigned int)hw->mac.perm_addr[4], + (unsigned int)hw->mac.perm_addr[5]); +} + +static int igb_macadmn(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct e1000_hw *hw; + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + hw = &adapter->hw; + if (hw == NULL) + return snprintf(page, count, "error: no hw data\n"); + + return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", + (unsigned int)hw->mac.addr[0], + (unsigned int)hw->mac.addr[1], + (unsigned int)hw->mac.addr[2], + (unsigned int)hw->mac.addr[3], + (unsigned int)hw->mac.addr[4], + (unsigned int)hw->mac.addr[5]); +} + +static int igb_numeports(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct e1000_hw *hw; + int ports; + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + hw = &adapter->hw; + if (hw == NULL) + return snprintf(page, count, "error: no hw data\n"); + + ports = 4; + + return snprintf(page, count, "%d\n", ports); +} + +static int igb_porttype(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + return snprintf(page, count, "%d\n", + test_bit(__IGB_DOWN, &adapter->state)); +} + +static int igb_therm_location(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + return snprintf(page, count, "%d\n", therm_data->sensor_data->location); +} + +static int igb_therm_maxopthresh(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + return snprintf(page, count, "%d\n", + therm_data->sensor_data->max_op_thresh); +} + +static int igb_therm_cautionthresh(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + return snprintf(page, count, "%d\n", + therm_data->sensor_data->caution_thresh); +} + +static int igb_therm_temp(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + s32 status; + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + status = e1000_get_thermal_sensor_data(therm_data->hw); + if (status != E1000_SUCCESS) + snprintf(page, count, "error: status %d returned\n", status); + + return snprintf(page, count, "%d\n", therm_data->sensor_data->temp); +} + +struct igb_proc_type{ + char name[32]; + int (*read)(char*, char**, off_t, int, int*, void*); +}; + +struct igb_proc_type igb_proc_entries[] = { + {"numeports", &igb_numeports}, + {"porttype", &igb_porttype}, + {"macburn", &igb_macburn}, + {"macadmn", &igb_macadmn}, + {"", NULL} +}; + +struct igb_proc_type igb_internal_entries[] = { + {"location", &igb_therm_location}, + {"temp", &igb_therm_temp}, + {"cautionthresh", &igb_therm_cautionthresh}, + {"maxopthresh", &igb_therm_maxopthresh}, + {"", NULL} +}; + +void igb_del_proc_entries(struct igb_adapter *adapter) +{ + int index, i; + char buf[16]; /* much larger than the sensor number will ever be */ + + if (igb_top_dir == NULL) + return; + + for (i = 0; i < E1000_MAX_SENSORS; i++) { + if (adapter->therm_dir[i] == NULL) + continue; + + for (index = 0; ; index++) { + if (igb_internal_entries[index].read == NULL) + break; + + remove_proc_entry(igb_internal_entries[index].name, + adapter->therm_dir[i]); + } + snprintf(buf, sizeof(buf), "sensor_%d", i); + remove_proc_entry(buf, adapter->info_dir); + } + + if (adapter->info_dir != NULL) { + for (index = 0; ; index++) { + if (igb_proc_entries[index].read == NULL) + break; + remove_proc_entry(igb_proc_entries[index].name, + adapter->info_dir); + } + remove_proc_entry("info", adapter->eth_dir); + } + + if (adapter->eth_dir != NULL) + remove_proc_entry(pci_name(adapter->pdev), igb_top_dir); +} + +/* called from igb_main.c */ +void igb_procfs_exit(struct igb_adapter *adapter) +{ + igb_del_proc_entries(adapter); +} + +int igb_procfs_topdir_init(void) +{ + igb_top_dir = proc_mkdir("driver/igb", NULL); + if (igb_top_dir == NULL) + return -ENOMEM; + + return 0; +} + +void igb_procfs_topdir_exit(void) +{ + remove_proc_entry("driver/igb", NULL); +} + +/* called from igb_main.c */ +int igb_procfs_init(struct igb_adapter *adapter) +{ + int rc = 0; + int i; + int index; + char buf[16]; /* much larger than the sensor number will ever be */ + + adapter->eth_dir = NULL; + adapter->info_dir = NULL; + for (i = 0; i < E1000_MAX_SENSORS; i++) + adapter->therm_dir[i] = NULL; + + if ( igb_top_dir == NULL ) { + rc = -ENOMEM; + goto fail; + } + + adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir); + if (adapter->eth_dir == NULL) { + rc = -ENOMEM; + goto fail; + } + + adapter->info_dir = proc_mkdir("info", adapter->eth_dir); + if (adapter->info_dir == NULL) { + rc = -ENOMEM; + goto fail; + } + for (index = 0; ; index++) { + if (igb_proc_entries[index].read == NULL) { + break; + } + if (!(create_proc_read_entry(igb_proc_entries[index].name, + 0444, + adapter->info_dir, + igb_proc_entries[index].read, + adapter))) { + + rc = -ENOMEM; + goto fail; + } + } + if (igb_thermal_present(adapter) == false) + goto exit; + + for (i = 0; i < E1000_MAX_SENSORS; i++) { + + if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0) + continue; + + snprintf(buf, sizeof(buf), "sensor_%d", i); + adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir); + if (adapter->therm_dir[i] == NULL) { + rc = -ENOMEM; + goto fail; + } + for (index = 0; ; index++) { + if (igb_internal_entries[index].read == NULL) + break; + /* + * therm_data struct contains pointer the read func + * will be needing + */ + adapter->therm_data[i].hw = &adapter->hw; + adapter->therm_data[i].sensor_data = + &adapter->hw.mac.thermal_sensor_data.sensor[i]; + + if (!(create_proc_read_entry( + igb_internal_entries[index].name, + 0444, + adapter->therm_dir[i], + igb_internal_entries[index].read, + &adapter->therm_data[i]))) { + rc = -ENOMEM; + goto fail; + } + } + } + goto exit; + +fail: + igb_del_proc_entries(adapter); +exit: + return rc; +} + +#endif /* !IGB_HWMON */ +#endif /* IGB_PROCFS */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c new file mode 100644 index 00000000..454b70ce --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c @@ -0,0 +1,944 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/****************************************************************************** + Copyright(c) 2011 Richard Cochran for some of the + 82576 and 82580 code +******************************************************************************/ + +#include "igb.h" + +#include +#include +#include +#include + +#define INCVALUE_MASK 0x7fffffff +#define ISGN 0x80000000 + +/* + * The 82580 timesync updates the system timer every 8ns by 8ns, + * and this update value cannot be reprogrammed. + * + * Neither the 82576 nor the 82580 offer registers wide enough to hold + * nanoseconds time values for very long. For the 82580, SYSTIM always + * counts nanoseconds, but the upper 24 bits are not available. The + * frequency is adjusted by changing the 32 bit fractional nanoseconds + * register, TIMINCA. + * + * For the 82576, the SYSTIM register time unit is affect by the + * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this + * field are needed to provide the nominal 16 nanosecond period, + * leaving 19 bits for fractional nanoseconds. + * + * We scale the NIC clock cycle by a large factor so that relatively + * small clock corrections can be added or subtracted at each clock + * tick. The drawbacks of a large factor are a) that the clock + * register overflows more quickly (not such a big deal) and b) that + * the increment per tick has to fit into 24 bits. As a result we + * need to use a shift of 19 so we can fit a value of 16 into the + * TIMINCA register. + * + * + * SYSTIMH SYSTIML + * +--------------+ +---+---+------+ + * 82576 | 32 | | 8 | 5 | 19 | + * +--------------+ +---+---+------+ + * \________ 45 bits _______/ fract + * + * +----------+---+ +--------------+ + * 82580 | 24 | 8 | | 32 | + * +----------+---+ +--------------+ + * reserved \______ 40 bits _____/ + * + * + * The 45 bit 82576 SYSTIM overflows every + * 2^45 * 10^-9 / 3600 = 9.77 hours. + * + * The 40 bit 82580 SYSTIM overflows every + * 2^40 * 10^-9 / 60 = 18.3 minutes. + */ + +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define IGB_PTP_TX_TIMEOUT (HZ * 15) +#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) +#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) +#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) +#define IGB_NBITS_82580 40 + +/* + * SYSTIM read access for the 82576 + */ + +static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) +{ + struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); + struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi; + + lo = E1000_READ_REG(hw, E1000_SYSTIML); + hi = E1000_READ_REG(hw, E1000_SYSTIMH); + + val = ((u64) hi) << 32; + val |= lo; + + return val; +} + +/* + * SYSTIM read access for the 82580 + */ + +static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) +{ + struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); + struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi; + + /* The timestamp latches on lowest register read. For the 82580 + * the lowest register is SYSTIMR instead of SYSTIML. However we only + * need to provide nanosecond resolution, so we just ignore it. + */ + E1000_READ_REG(hw, E1000_SYSTIMR); + lo = E1000_READ_REG(hw, E1000_SYSTIML); + hi = E1000_READ_REG(hw, E1000_SYSTIMH); + + val = ((u64) hi) << 32; + val |= lo; + + return val; +} + +/* + * SYSTIM read access for I210/I211 + */ + +static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + u32 sec, nsec; + + /* The timestamp latches on lowest register read. For I210/I211, the + * lowest register is SYSTIMR. Since we only need to provide nanosecond + * resolution, we can ignore it. + */ + E1000_READ_REG(hw, E1000_SYSTIMR); + nsec = E1000_READ_REG(hw, E1000_SYSTIML); + sec = E1000_READ_REG(hw, E1000_SYSTIMH); + + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static void igb_ptp_write_i210(struct igb_adapter *adapter, + const struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + + /* + * Writing the SYSTIMR register is not necessary as it only provides + * sub-nanosecond resolution. + */ + E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec); + E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec); +} + +/** + * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp + * @adapter: board private structure + * @hwtstamps: timestamp structure to update + * @systim: unsigned 64bit system time value. + * + * We need to convert the system time value stored in the RX/TXSTMP registers + * into a hwtstamp which can be used by the upper level timestamping functions. + * + * The 'tmreg_lock' spinlock is used to protect the consistency of the + * system time value. This is needed because reading the 64 bit time + * value involves reading two (or three) 32 bit registers. The first + * read latches the value. Ditto for writing. + * + * In addition, here have extended the system time with an overflow + * counter in software. + **/ +static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamps, + u64 systim) +{ + unsigned long flags; + u64 ns; + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + spin_lock_irqsave(&adapter->tmreg_lock, flags); + + ns = timecounter_cyc2time(&adapter->tc, systim); + + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ns); + break; + case e1000_i210: + case e1000_i211: + memset(hwtstamps, 0, sizeof(*hwtstamps)); + /* Upper 32 bits contain s, lower 32 bits contain ns. */ + hwtstamps->hwtstamp = ktime_set(systim >> 32, + systim & 0xFFFFFFFF); + break; + default: + break; + } +} + +/* + * PTP clock operations + */ + +static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; + u64 rate; + u32 incvalue; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate = ppb; + rate <<= 14; + rate = div_u64(rate, 1953125); + + incvalue = 16 << IGB_82576_TSYNC_SHIFT; + + if (neg_adj) + incvalue -= rate; + else + incvalue += rate; + + E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK)); + + return 0; +} + +static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; + u64 rate; + u32 inca; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate = ppb; + rate <<= 26; + rate = div_u64(rate, 1953125); + + /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x + * as quickly. Account for this by dividing the adjustment by 2.5. + */ + if (hw->mac.type == e1000_i354) { + u32 status = E1000_READ_REG(hw, E1000_STATUS); + + if ((status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) { + rate <<= 1; + rate = div_u64(rate, 5); + } + } + + inca = rate & INCVALUE_MASK; + if (neg_adj) + inca |= ISGN; + + E1000_WRITE_REG(hw, E1000_TIMINCA, inca); + + return 0; +} + +static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + s64 now; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + now = timecounter_read(&igb->tc); + now += delta; + timecounter_init(&igb->tc, &igb->cc, now); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + struct timespec now, then = ns_to_timespec(delta); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, &now); + now = timespec_add(now, then); + igb_ptp_write_i210(igb, (const struct timespec *)&now); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, + struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + u64 ns; + u32 remainder; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + ns = timecounter_read(&igb->tc); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); + ts->tv_nsec = remainder; + + return 0; +} + +static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, + struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + u64 ns; + + ns = ts->tv_sec * 1000000000ULL; + ns += ts->tv_nsec; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + timecounter_init(&igb->tc, &igb->cc, ns); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_write_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +/** + * igb_ptp_tx_work + * @work: pointer to work struct + * + * This work function polls the TSYNCTXCTL valid bit to determine when a + * timestamp has been taken for the current stored skb. + */ +void igb_ptp_tx_work(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, struct igb_adapter, + ptp_tx_work); + struct e1000_hw *hw = &adapter->hw; + u32 tsynctxctl; + + if (!adapter->ptp_tx_skb) + return; + + if (time_is_before_jiffies(adapter->ptp_tx_start + + IGB_PTP_TX_TIMEOUT)) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + adapter->tx_hwtstamp_timeouts++; + dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang"); + return; + } + + tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL); + if (tsynctxctl & E1000_TSYNCTXCTL_VALID) + igb_ptp_tx_hwtstamp(adapter); + else + /* reschedule to check later */ + schedule_work(&adapter->ptp_tx_work); +} + +static void igb_ptp_overflow_check(struct work_struct *work) +{ + struct igb_adapter *igb = + container_of(work, struct igb_adapter, ptp_overflow_work.work); + struct timespec ts; + + igb->ptp_caps.gettime(&igb->ptp_caps, &ts); + + pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); + + schedule_delayed_work(&igb->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); +} + +/** + * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched + * @adapter: private network adapter structure + * + * This watchdog task is scheduled to detect error case where hardware has + * dropped an Rx packet that was timestamped when the ring is full. The + * particular error is rare but leaves the device in a state unable to timestamp + * any future packets. + */ +void igb_ptp_rx_hang(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct igb_ring *rx_ring; + u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL); + unsigned long rx_event; + int n; + + if (hw->mac.type != e1000_82576) + return; + + /* If we don't have a valid timestamp in the registers, just update the + * timeout counter and exit + */ + if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) { + adapter->last_rx_ptp_check = jiffies; + return; + } + + /* Determine the most recent watchdog or rx_timestamp event */ + rx_event = adapter->last_rx_ptp_check; + for (n = 0; n < adapter->num_rx_queues; n++) { + rx_ring = adapter->rx_ring[n]; + if (time_after(rx_ring->last_rx_timestamp, rx_event)) + rx_event = rx_ring->last_rx_timestamp; + } + + /* Only need to read the high RXSTMP register to clear the lock */ + if (time_is_before_jiffies(rx_event + 5 * HZ)) { + E1000_READ_REG(hw, E1000_RXSTMPH); + adapter->last_rx_ptp_check = jiffies; + adapter->rx_hwtstamp_cleared++; + dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang"); + } +} + +/** + * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp + * @adapter: Board private structure. + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct skb_shared_hwtstamps shhwtstamps; + u64 regval; + + regval = E1000_READ_REG(hw, E1000_TXSTMPL); + regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32; + + igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; +} + +/** + * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp + * @q_vector: Pointer to interrupt specific structure + * @va: Pointer to address containing Rx buffer + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve a timestamp from the first buffer of an + * incoming frame. The value is stored in little endian format starting on + * byte 8. + */ +void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, + unsigned char *va, + struct sk_buff *skb) +{ + __le64 *regval = (__le64 *)va; + + /* + * The timestamp is recorded in little endian format. + * DWORD: 0 1 2 3 + * Field: Reserved Reserved SYSTIML SYSTIMH + */ + igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + le64_to_cpu(regval[1])); +} + +/** + * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register + * @q_vector: Pointer to interrupt specific structure + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve a timestamp from the internal registers + * of the adapter and store it in the skb. + */ +void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, + struct sk_buff *skb) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + u64 regval; + + /* + * If this bit is set, then the RX registers contain the time stamp. No + * other packet will be time stamped until we read these registers, so + * read the registers to make them available again. Because only one + * packet can be time stamped at a time, we know that the register + * values must belong to this one here and therefore we don't need to + * compare any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a shared tx_flags that we + * can turn into a skb_shared_hwtstamps. + */ + if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) + return; + + regval = E1000_READ_REG(hw, E1000_RXSTMPL); + regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32; + + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); +} + +/** + * igb_ptp_hwtstamp_ioctl - control hardware time stamping + * @netdev: + * @ifreq: + * @cmd: + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + **/ +int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct hwtstamp_config config; + u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + u32 tsync_rx_cfg = 0; + bool is_l4 = false; + bool is_l2 = false; + u32 regval; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl = 0; + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = true; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_ALL: + /* + * 82576 cannot timestamp all packets, which it needs to do to + * support both V1 Sync and Delay_Req messages + */ + if (hw->mac.type != e1000_82576) { + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + } + /* fall through */ + default: + config.rx_filter = HWTSTAMP_FILTER_NONE; + return -ERANGE; + } + + if (hw->mac.type == e1000_82575) { + if (tsync_rx_ctl | tsync_tx_ctl) + return -EINVAL; + return 0; + } + + /* + * Per-packet timestamping only works if all packets are + * timestamped, so enable timestamping in all packets as + * long as one rx filter was configured. + */ + if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { + tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + is_l2 = true; + is_l4 = true; + + if ((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) { + regval = E1000_READ_REG(hw, E1000_RXPBS); + regval |= E1000_RXPBS_CFG_TS_EN; + E1000_WRITE_REG(hw, E1000_RXPBS, regval); + } + } + + /* enable/disable TX */ + regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL); + regval &= ~E1000_TSYNCTXCTL_ENABLED; + regval |= tsync_tx_ctl; + E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval); + + /* enable/disable RX */ + regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL); + regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); + regval |= tsync_rx_ctl; + E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval); + + /* define which PTP packets are time stamped */ + E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg); + + /* define ethertype filter for timestamped packets */ + if (is_l2) + E1000_WRITE_REG(hw, E1000_ETQF(3), + (E1000_ETQF_FILTER_ENABLE | /* enable filter */ + E1000_ETQF_1588 | /* enable timestamping */ + ETH_P_1588)); /* 1588 eth protocol type */ + else + E1000_WRITE_REG(hw, E1000_ETQF(3), 0); + + /* L4 Queue Filter[3]: filter by destination port and protocol */ + if (is_l4) { + u32 ftqf = (IPPROTO_UDP /* UDP */ + | E1000_FTQF_VF_BP /* VF not compared */ + | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ + | E1000_FTQF_MASK); /* mask all inputs */ + ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ + + E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT)); + E1000_WRITE_REG(hw, E1000_IMIREXT(3), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); + if (hw->mac.type == e1000_82576) { + /* enable source port check */ + E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT)); + ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; + } + E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf); + } else { + E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK); + } + E1000_WRITE_FLUSH(hw); + + /* clear TX/RX time stamp registers, just to be sure */ + regval = E1000_READ_REG(hw, E1000_TXSTMPL); + regval = E1000_READ_REG(hw, E1000_TXSTMPH); + regval = E1000_READ_REG(hw, E1000_RXSTMPL); + regval = E1000_READ_REG(hw, E1000_RXSTMPH); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +void igb_ptp_init(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + + switch (hw->mac.type) { + case e1000_82576: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 999999881; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82576; + adapter->cc.mask = CLOCKSOURCE_MASK(64); + adapter->cc.mult = 1; + adapter->cc.shift = IGB_82576_TSYNC_SHIFT; + /* Dial the nominal frequency. */ + E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | + INCVALUE_82576); + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82580; + adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); + adapter->cc.mult = 1; + adapter->cc.shift = 0; + /* Enable the timer functions by clearing bit 31. */ + E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); + break; + case e1000_i210: + case e1000_i211: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; + adapter->ptp_caps.gettime = igb_ptp_gettime_i210; + adapter->ptp_caps.settime = igb_ptp_settime_i210; + adapter->ptp_caps.enable = igb_ptp_enable; + /* Enable the timer functions by clearing bit 31. */ + E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); + break; + default: + adapter->ptp_clock = NULL; + return; + } + + E1000_WRITE_FLUSH(hw); + + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + /* Initialize the clock and overflow work for devices that need it. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); + + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); + + schedule_delayed_work(&adapter->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); + } + + /* Initialize the time sync interrupts for devices that support it. */ + if (hw->mac.type >= e1000_82580) { + E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); + E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); + } + + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); + if (IS_ERR(adapter->ptp_clock)) { + adapter->ptp_clock = NULL; + dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); + } else { + dev_info(&adapter->pdev->dev, "added PHC on %s\n", + adapter->netdev->name); + adapter->flags |= IGB_FLAG_PTP; + } +} + +/** + * igb_ptp_stop - Disable PTP device and stop the overflow check. + * @adapter: Board private structure. + * + * This function stops the PTP support and cancels the delayed work. + **/ +void igb_ptp_stop(struct igb_adapter *adapter) +{ + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + cancel_delayed_work_sync(&adapter->ptp_overflow_work); + break; + case e1000_i210: + case e1000_i211: + /* No delayed work to cancel. */ + break; + default: + return; + } + + cancel_work_sync(&adapter->ptp_tx_work); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + } + + if (adapter->ptp_clock) { + ptp_clock_unregister(adapter->ptp_clock); + dev_info(&adapter->pdev->dev, "removed PHC on %s\n", + adapter->netdev->name); + adapter->flags &= ~IGB_FLAG_PTP; + } +} + +/** + * igb_ptp_reset - Re-enable the adapter for PTP following a reset. + * @adapter: Board private structure. + * + * This function handles the reset work required to re-enable the PTP device. + **/ +void igb_ptp_reset(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + if (!(adapter->flags & IGB_FLAG_PTP)) + return; + + switch (adapter->hw.mac.type) { + case e1000_82576: + /* Dial the nominal frequency. */ + E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | + INCVALUE_82576); + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* Enable the timer functions and interrupts. */ + E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); + E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); + E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); + break; + default: + /* No work to do. */ + return; + } + + /* Re-initialize the timer. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); + + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + } +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h new file mode 100644 index 00000000..18da64a3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h @@ -0,0 +1,249 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* ethtool register test data */ +struct igb_reg_test { + u16 reg; + u16 reg_offset; + u16 array_len; + u16 test_type; + u32 mask; + u32 write; +}; + +/* In the hardware, registers are laid out either singly, in arrays + * spaced 0x100 bytes apart, or in contiguous tables. We assume + * most tests take place on arrays or single registers (handled + * as a single-element array) and special-case the tables. + * Table tests are always pattern tests. + * + * We also make provision for some required setup steps by specifying + * registers to be written without any read-back testing. + */ + +#define PATTERN_TEST 1 +#define SET_READ_TEST 2 +#define WRITE_NO_TEST 3 +#define TABLE32_TEST 4 +#define TABLE64_TEST_LO 5 +#define TABLE64_TEST_HI 6 + +/* i210 reg test */ +static struct igb_reg_test reg_test_i210[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* RDH is read-only for i210, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0003FFF0, 0x0003FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x900FFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* i350 reg test */ +static struct igb_reg_test reg_test_i350[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + /* VET is readonly on i350 */ + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* RDH is read-only for i350, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0xC3FFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 16, TABLE64_TEST_HI, + 0xC3FFFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* 82580 reg test */ +static struct igb_reg_test reg_test_82580[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + /* RDH is read-only for 82580, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* 82576 reg test */ +static struct igb_reg_test reg_test_82576[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_RDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + /* Enable all queues before testing. */ + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, + { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, + /* RDH is read-only for 82576, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RDT(4), 0x40, 12, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 }, + { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, 0 }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_TDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* 82575 register test */ +static struct igb_reg_test reg_test_82575[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* Enable all four RX queues before testing. */ + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, + /* RDH is read-only for 82575, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_TXCW, 0x100, 1, PATTERN_TEST, 0xC000FFFF, 0x0000FFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x800FFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c new file mode 100644 index 00000000..015c8952 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c @@ -0,0 +1,436 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#include + +#include "igb.h" +#include "igb_vmdq.h" +#include + +#ifdef CONFIG_IGB_VMDQ_NETDEV +int igb_vmdq_open(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct net_device *main_netdev = adapter->netdev; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + if (test_bit(__IGB_DOWN, &adapter->state)) { + DPRINTK(DRV, WARNING, + "Open %s before opening this device.\n", + main_netdev->name); + return -EAGAIN; + } + netif_carrier_off(dev); + vadapter->tx_ring->vmdq_netdev = dev; + vadapter->rx_ring->vmdq_netdev = dev; + if (is_valid_ether_addr(dev->dev_addr)) { + igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); + igb_add_mac_filter(adapter, dev->dev_addr, hw_queue); + } + netif_carrier_on(dev); + return 0; +} + +int igb_vmdq_close(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + netif_carrier_off(dev); + igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); + + vadapter->tx_ring->vmdq_netdev = NULL; + vadapter->rx_ring->vmdq_netdev = NULL; + return 0; +} + +netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + + return igb_xmit_frame_ring(skb, vadapter->tx_ring); +} + +struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + vadapter->net_stats.rx_packets += + E1000_READ_REG(hw, E1000_PFVFGPRC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGPRC(hw_queue), 0); + vadapter->net_stats.tx_packets += + E1000_READ_REG(hw, E1000_PFVFGPTC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGPTC(hw_queue), 0); + vadapter->net_stats.rx_bytes += + E1000_READ_REG(hw, E1000_PFVFGORC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGORC(hw_queue), 0); + vadapter->net_stats.tx_bytes += + E1000_READ_REG(hw, E1000_PFVFGOTC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGOTC(hw_queue), 0); + vadapter->net_stats.multicast += + E1000_READ_REG(hw, E1000_PFVFMPRC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFMPRC(hw_queue), 0); + /* only return the current stats */ + return &vadapter->net_stats; +} + +/** + * igb_write_vm_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure + * + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table + **/ +static int igb_write_vm_addr_list(struct net_device *netdev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + int count = 0; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + /* return ENOMEM indicating insufficient memory for addresses */ + if (netdev_uc_count(netdev) > igb_available_rars(adapter)) + return -ENOMEM; + + if (!netdev_uc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + netdev_for_each_uc_addr(ha, netdev) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + igb_del_mac_filter(adapter, ha->addr, hw_queue); + igb_add_mac_filter(adapter, ha->addr, hw_queue); +#else + igb_del_mac_filter(adapter, ha->da_addr, hw_queue); + igb_add_mac_filter(adapter, ha->da_addr, hw_queue); +#endif + count++; + } + } + return count; +} + + +#define E1000_VMOLR_UPE 0x20000000 /* Unicast promiscuous mode */ +void igb_vmdq_set_rx_mode(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + u32 vmolr, rctl; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + /* Check for Promiscuous and All Multicast modes */ + vmolr = E1000_READ_REG(hw, E1000_VMOLR(hw_queue)); + + /* clear the affected bits */ + vmolr &= ~(E1000_VMOLR_UPE | E1000_VMOLR_MPME | + E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE); + + if (dev->flags & IFF_PROMISC) { + vmolr |= E1000_VMOLR_UPE; + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl |= E1000_RCTL_UPE; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + } else { + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl &= ~E1000_RCTL_UPE; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + if (dev->flags & IFF_ALLMULTI) { + vmolr |= E1000_VMOLR_MPME; + } else { + /* + * Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + if (igb_write_mc_addr_list(adapter->netdev) != 0) + vmolr |= E1000_VMOLR_ROMPE; + } +#ifdef HAVE_SET_RX_MODE + /* + * Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + if (igb_write_vm_addr_list(dev) < 0) + vmolr |= E1000_VMOLR_UPE; +#endif + } + E1000_WRITE_REG(hw, E1000_VMOLR(hw_queue), vmolr); + + return; +} + +int igb_vmdq_set_mac(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return igb_add_mac_filter(adapter, dev->dev_addr, hw_queue); +} + +int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + + if (adapter->netdev->mtu < new_mtu) { + DPRINTK(PROBE, INFO, + "Set MTU on %s to >= %d " + "before changing MTU on %s\n", + adapter->netdev->name, new_mtu, dev->name); + return -EINVAL; + } + dev->mtu = new_mtu; + return 0; +} + +void igb_vmdq_tx_timeout(struct net_device *dev) +{ + return; +} + +void igb_vmdq_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + vadapter->vlgrp = grp; + + igb_enable_vlan_tags(adapter); + E1000_WRITE_REG(hw, E1000_VMVIR(hw_queue), 0); + + return; +} +void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; +#ifndef HAVE_NETDEV_VLAN_FEATURES + struct net_device *v_netdev; +#endif + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + /* attempt to add filter to vlvf array */ + igb_vlvf_set(adapter, vid, TRUE, hw_queue); + +#ifndef HAVE_NETDEV_VLAN_FEATURES + + /* Copy feature flags from netdev to the vlan netdev for this vid. + * This allows things like TSO to bubble down to our vlan device. + */ + v_netdev = vlan_group_get_device(vadapter->vlgrp, vid); + v_netdev->features |= adapter->netdev->features; + vlan_group_set_device(vadapter->vlgrp, vid, v_netdev); +#endif + + return; +} +void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + vlan_group_set_device(vadapter->vlgrp, vid, NULL); + /* remove vlan from VLVF table array */ + igb_vlvf_set(adapter, vid, FALSE, hw_queue); + + + return; +} + +static int igb_vmdq_get_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + u32 status; + + if (hw->phy.media_type == e1000_media_type_copper) { + + ecmd->supported = (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Full| + SUPPORTED_Autoneg | + SUPPORTED_TP); + ecmd->advertising = ADVERTISED_TP; + + if (hw->mac.autoneg == 1) { + ecmd->advertising |= ADVERTISED_Autoneg; + /* the e1000 autoneg seems to match ethtool nicely */ + ecmd->advertising |= hw->phy.autoneg_advertised; + } + + ecmd->port = PORT_TP; + ecmd->phy_address = hw->phy.addr; + } else { + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE | + SUPPORTED_Autoneg); + + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE | + ADVERTISED_Autoneg); + + ecmd->port = PORT_FIBRE; + } + + ecmd->transceiver = XCVR_INTERNAL; + + status = E1000_READ_REG(hw, E1000_STATUS); + + if (status & E1000_STATUS_LU) { + + if ((status & E1000_STATUS_SPEED_1000) || + hw->phy.media_type != e1000_media_type_copper) + ecmd->speed = SPEED_1000; + else if (status & E1000_STATUS_SPEED_100) + ecmd->speed = SPEED_100; + else + ecmd->speed = SPEED_10; + + if ((status & E1000_STATUS_FD) || + hw->phy.media_type != e1000_media_type_copper) + ecmd->duplex = DUPLEX_FULL; + else + ecmd->duplex = DUPLEX_HALF; + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + return 0; +} + + +static u32 igb_vmdq_get_msglevel(struct net_device *netdev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + return adapter->msg_enable; +} + +static void igb_vmdq_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct net_device *main_netdev = adapter->netdev; + + strncpy(drvinfo->driver, igb_driver_name, 32); + strncpy(drvinfo->version, igb_driver_version, 32); + + strncpy(drvinfo->fw_version, "N/A", 4); + snprintf(drvinfo->bus_info, 32, "%s VMDQ %d", main_netdev->name, + vadapter->rx_ring->queue_index); + drvinfo->n_stats = 0; + drvinfo->testinfo_len = 0; + drvinfo->regdump_len = 0; +} + +static void igb_vmdq_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + + struct igb_ring *tx_ring = vadapter->tx_ring; + struct igb_ring *rx_ring = vadapter->rx_ring; + + ring->rx_max_pending = IGB_MAX_RXD; + ring->tx_max_pending = IGB_MAX_TXD; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = rx_ring->count; + ring->tx_pending = tx_ring->count; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} +static u32 igb_vmdq_get_rx_csum(struct net_device *netdev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + + return test_bit(IGB_RING_FLAG_RX_CSUM, &adapter->rx_ring[0]->flags); +} + + +static struct ethtool_ops igb_vmdq_ethtool_ops = { + .get_settings = igb_vmdq_get_settings, + .get_drvinfo = igb_vmdq_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ringparam = igb_vmdq_get_ringparam, + .get_rx_csum = igb_vmdq_get_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_msglevel = igb_vmdq_get_msglevel, +#ifdef NETIF_F_TSO + .get_tso = ethtool_op_get_tso, +#endif +#ifdef HAVE_ETHTOOL_GET_PERM_ADDR + .get_perm_addr = ethtool_op_get_perm_addr, +#endif +}; + +void igb_vmdq_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &igb_vmdq_ethtool_ops); +} + + +#endif /* CONFIG_IGB_VMDQ_NETDEV */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h new file mode 100644 index 00000000..e51e7c4e --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h @@ -0,0 +1,46 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IGB_VMDQ_H_ +#define _IGB_VMDQ_H_ + +#ifdef CONFIG_IGB_VMDQ_NETDEV +int igb_vmdq_open(struct net_device *dev); +int igb_vmdq_close(struct net_device *dev); +netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev); +struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev); +void igb_vmdq_set_rx_mode(struct net_device *dev); +int igb_vmdq_set_mac(struct net_device *dev, void *addr); +int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu); +void igb_vmdq_tx_timeout(struct net_device *dev); +void igb_vmdq_vlan_rx_register(struct net_device *dev, + struct vlan_group *grp); +void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid); +void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid); +void igb_vmdq_set_ethtool_ops(struct net_device *netdev); +#endif /* CONFIG_IGB_VMDQ_NETDEV */ +#endif /* _IGB_VMDQ_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c new file mode 100644 index 00000000..bde3a83c --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c @@ -0,0 +1,1482 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" +#include "kcompat.h" + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +/* From lib/vsprintf.c */ +#include + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define _kc_ZEROPAD 1 /* pad with zero */ +#define _kc_SIGN 2 /* unsigned/signed long */ +#define _kc_PLUS 4 /* show plus */ +#define _kc_SPACE 8 /* space if plus */ +#define _kc_LEFT 16 /* left justified */ +#define _kc_SPECIAL 32 /* 0x */ +#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & _kc_LARGE) ? large_digits : small_digits; + if (type & _kc_LEFT) + type &= ~_kc_ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & _kc_ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & _kc_SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & _kc_PLUS) { + sign = '+'; + size--; + } else if (type & _kc_SPACE) { + sign = ' '; + size--; + } + } + if (type & _kc_SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { + while(size-->0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + } + if (sign) { + if (buf <= end) + *buf = sign; + ++buf; + } + if (type & _kc_SPECIAL) { + if (base==8) { + if (buf <= end) + *buf = '0'; + ++buf; + } else if (base==16) { + if (buf <= end) + *buf = '0'; + ++buf; + if (buf <= end) + *buf = digits[33]; + ++buf; + } + } + if (!(type & _kc_LEFT)) { + while (size-- > 0) { + if (buf <= end) + *buf = c; + ++buf; + } + } + while (i < precision--) { + if (buf <= end) + *buf = '0'; + ++buf; + } + while (i-- > 0) { + if (buf <= end) + *buf = tmp[i]; + ++buf; + } + while (size-- > 0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + return buf; +} + +int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) { + end = ((void *) -1); + size = end - buf + 1; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str <= end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= _kc_LEFT; goto repeat; + case '+': flags |= _kc_PLUS; goto repeat; + case ' ': flags |= _kc_SPACE; goto repeat; + case '#': flags |= _kc_SPECIAL; goto repeat; + case '0': flags |= _kc_ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= _kc_LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & _kc_LEFT)) { + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + } + c = (unsigned char) va_arg(args, int); + if (str <= end) + *str = c; + ++str; + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & _kc_LEFT)) { + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + } + for (i = 0; i < len; ++i) { + if (str <= end) + *str = *s; + ++str; ++s; + } + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= _kc_ZEROPAD; + } + str = number(str, end, + (unsigned long) va_arg(args, void *), + 16, field_width, precision, flags); + continue; + + + case 'n': + /* FIXME: + * What does C99 say about the overflow case here? */ + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= _kc_LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= _kc_SIGN; + case 'u': + break; + + default: + if (str <= end) + *str = '%'; + ++str; + if (*fmt) { + if (str <= end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & _kc_SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & _kc_SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & _kc_SIGN) + num = (signed int) num; + } + str = number(str, end, num, base, + field_width, precision, flags); + } + if (str <= end) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str-buf; +} + +int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = _kc_vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} +#endif /* < 2.4.8 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#if defined(CONFIG_HIGHMEM) + +#ifndef PCI_DRAM_OFFSET +#define PCI_DRAM_OFFSET 0 +#endif + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return (((u64) (page - mem_map) << PAGE_SHIFT) + offset + + PCI_DRAM_OFFSET); +} + +#else /* CONFIG_HIGHMEM */ + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return pci_map_single(dev, (void *)page_address(page) + offset, size, + direction); +} + +#endif /* CONFIG_HIGHMEM */ + +void +_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, + int direction) +{ + return pci_unmap_single(dev, dma_addr, size, direction); +} + +#endif /* 2.4.13 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +int +_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) +{ + if (!pci_dma_supported(dev, mask)) + return -EIO; + dev->dma_mask = mask; + return 0; +} + +int +_kc_pci_request_regions(struct pci_dev *dev, char *res_name) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) { + if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { + if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } + } + return 0; +} + +void +_kc_pci_release_regions(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) + release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + + else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) + release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + } +} + +/**************************************/ +/* NETWORK DRIVER API */ + +struct net_device * +_kc_alloc_etherdev(int sizeof_priv) +{ + struct net_device *dev; + int alloc_size; + + alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; + dev = kzalloc(alloc_size, GFP_KERNEL); + if (!dev) + return NULL; + + if (sizeof_priv) + dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); + dev->name[0] = '\0'; + ether_setup(dev); + + return dev; +} + +int +_kc_is_valid_ether_addr(u8 *addr) +{ + const char zaddr[6] = { 0, }; + + return !(addr[0] & 1) && memcmp(addr, zaddr, 6); +} + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +int +_kc_pci_set_power_state(struct pci_dev *dev, int state) +{ + return 0; +} + +int +_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) +{ + return 0; +} + +#endif /* 2.4.6 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + frag->page = page; + frag->page_offset = off; + frag->size = size; + skb_shinfo(skb)->nr_frags = i + 1; +} + +/* + * Original Copyright: + * find_next_bit.c: fallback find next bit implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + ffs(tmp); +} + +size_t _kc_strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +#ifndef do_div +#if BITS_PER_LONG == 32 +uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base) +{ + uint64_t rem = *n; + uint64_t b = base; + uint64_t res, d = 1; + uint32_t high = rem >> 32; + + /* Reduce the thing a bit first */ + res = 0; + if (high >= base) { + high /= base; + res = (uint64_t) high << 32; + rem -= (uint64_t) (high*base) << 32; + } + + while ((int64_t)b > 0 && b < rem) { + b = b+b; + d = d+d; + } + + do { + if (rem >= b) { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + *n = res; + return rem; +} +#endif /* BITS_PER_LONG == 32 */ +#endif /* do_div */ +#endif /* 2.6.0 => 2.4.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + return (i >= size) ? (size - 1) : i; +} +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +char *_kc_kstrdup(const char *s, unsigned int gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s) + 1; + buf = kmalloc(len, gfp); + if (buf) + memcpy(buf, s, len); + return buf; +} +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +void *_kc_kzalloc(size_t size, int flags) +{ + void *ret = kmalloc(size, flags); + if (ret) + memset(ret, 0, size); + return ret; +} +#endif /* <= 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) +int _kc_skb_pad(struct sk_buff *skb, int pad) +{ + int ntail; + + /* If the skbuff is non linear tailroom is always zero.. */ + if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { + memset(skb->data+skb->len, 0, pad); + return 0; + } + + ntail = skb->data_len + pad - (skb->end - skb->tail); + if (likely(skb_cloned(skb) || ntail > 0)) { + if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); + goto free_skb; + } + +#ifdef MAX_SKB_FRAGS + if (skb_is_nonlinear(skb) && + !__pskb_pull_tail(skb, skb->data_len)) + goto free_skb; + +#endif + memset(skb->data + skb->len, 0, pad); + return 0; + +free_skb: + kfree_skb(skb); + return -ENOMEM; +} + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +int _kc_pci_save_state(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct adapter_struct *adapter = netdev_priv(netdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset, pcie_link_status; + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) + /* no ->dev for 2.4 kernels */ + WARN_ON(pdev->dev.driver_data == NULL); +#endif + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset) { + if (!pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + } + pci_config_space_ich8lan(); +#ifdef HAVE_PCI_ERS + if (adapter->config_space == NULL) +#else + WARN_ON(adapter->config_space != NULL); +#endif + adapter->config_space = kmalloc(size, GFP_KERNEL); + if (!adapter->config_space) { + printk(KERN_ERR "Out of memory in pci_save_state\n"); + return -ENOMEM; + } + for (i = 0; i < (size / 4); i++) + pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); + return 0; +} + +void _kc_pci_restore_state(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct adapter_struct *adapter = netdev_priv(netdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset; + u16 pcie_link_status; + + if (adapter->config_space != NULL) { + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset && + !pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + + pci_config_space_ich8lan(); + for (i = 0; i < (size / 4); i++) + pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); +#ifndef HAVE_PCI_ERS + kfree(adapter->config_space); + adapter->config_space = NULL; +#endif + } +} +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +void _kc_free_netdev(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + + if (adapter->config_space != NULL) + kfree(adapter->config_space); +#ifdef CONFIG_SYSFS + if (netdev->reg_state == NETREG_UNINITIALIZED) { + kfree((char *)netdev - netdev->padded); + } else { + BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); + netdev->reg_state = NETREG_RELEASED; + class_device_put(&netdev->class_dev); + } +#else + kfree((char *)netdev - netdev->padded); +#endif +} +#endif + +void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) +{ + void *p; + + p = kzalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} +#endif /* <= 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev) +{ + return ((struct adapter_struct *)netdev_priv(netdev))->pdev; +} +#endif /* < 2.6.21 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +/* hexdump code taken from lib/hexdump.c */ +static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, + int groupsize, unsigned char *linebuf, + size_t linebuflen, bool ascii) +{ + const u8 *ptr = buf; + u8 ch; + int j, lx = 0; + int ascii_column; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + if (!len) + goto nil; + if (len > rowsize) /* limit to one line at a time */ + len = rowsize; + if ((len % groupsize) != 0) /* no mixed size output */ + groupsize = 1; + + switch (groupsize) { + case 8: { + const u64 *ptr8 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); + ascii_column = 17 * ngroups + 2; + break; + } + + case 4: { + const u32 *ptr4 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%8.8x", j ? " " : "", *(ptr4 + j)); + ascii_column = 9 * ngroups + 2; + break; + } + + case 2: { + const u16 *ptr2 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%4.4x", j ? " " : "", *(ptr2 + j)); + ascii_column = 5 * ngroups + 2; + break; + } + + default: + for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = hex_asc(ch >> 4); + linebuf[lx++] = hex_asc(ch & 0x0f); + linebuf[lx++] = ' '; + } + if (j) + lx--; + + ascii_column = 3 * rowsize + 2; + break; + } + if (!ascii) + goto nil; + + while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) + linebuf[lx++] = ' '; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) + linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] + : '.'; +nil: + linebuf[lx++] = '\0'; +} + +void _kc_print_hex_dump(const char *level, + const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ + const u8 *ptr = buf; + int i, linelen, remaining = len; + unsigned char linebuf[200]; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + for (i = 0; i < len; i += rowsize) { + linelen = min(remaining, rowsize); + remaining -= rowsize; + _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, + linebuf, sizeof(linebuf), ascii); + + switch (prefix_type) { + case DUMP_PREFIX_ADDRESS: + printk("%s%s%*p: %s\n", level, prefix_str, + (int)(2 * sizeof(void *)), ptr + i, linebuf); + break; + case DUMP_PREFIX_OFFSET: + printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); + break; + default: + printk("%s%s%s\n", level, prefix_str, linebuf); + break; + } + } +} + +#ifdef HAVE_I2C_SUPPORT +struct i2c_client * +_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) +{ + struct i2c_client *client; + int status; + + client = kzalloc(sizeof *client, GFP_KERNEL); + if (!client) + return NULL; + + client->adapter = adap; + + client->dev.platform_data = info->platform_data; + + client->flags = info->flags; + client->addr = info->addr; + + strlcpy(client->name, info->type, sizeof(client->name)); + + /* Check for address business */ + status = i2c_check_addr(adap, client->addr); + if (status) + goto out_err; + + client->dev.parent = &client->adapter->dev; + client->dev.bus = &i2c_bus_type; + + status = i2c_attach_client(client); + if (status) + goto out_err; + + dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", + client->name, dev_name(&client->dev)); + + return client; + +out_err: + dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x " + "(%d)\n", client->name, client->addr, status); + kfree(client); + return NULL; +} +#endif /* HAVE_I2C_SUPPORT */ +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifdef NAPI +struct net_device *napi_to_poll_dev(const struct napi_struct *napi) +{ + struct adapter_q_vector *q_vector = container_of(napi, + struct adapter_q_vector, + napi); + return &q_vector->poll_dev; +} + +int __kc_adapter_clean(struct net_device *netdev, int *budget) +{ + int work_done; + int work_to_do = min(*budget, netdev->quota); + /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ + struct napi_struct *napi = netdev->priv; + work_done = napi->poll(napi, work_to_do); + *budget -= work_done; + netdev->quota -= work_done; + return (work_done >= work_to_do) ? 1 : 0; +} +#endif /* NAPI */ +#endif /* <= 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) +{ + struct pci_dev *parent = pdev->bus->self; + u16 link_state; + int pos; + + if (!parent) + return; + + pos = pci_find_capability(parent, PCI_CAP_ID_EXP); + if (pos) { + pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); + link_state &= ~state; + pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); + } +} +#endif /* < 2.6.26 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +#ifdef HAVE_TX_MQ +void _kc_netif_tx_stop_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_stop_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +} +void _kc_netif_tx_wake_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_wake_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_wake_subqueue(netdev, i); +} +void _kc_netif_tx_start_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_start_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_start_subqueue(netdev, i); +} +#endif /* HAVE_TX_MQ */ + +#ifndef __WARN_printf +void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) +{ + va_list args; + + printk(KERN_WARNING "------------[ cut here ]------------\n"); + printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + + dump_stack(); +} +#endif /* __WARN_printf */ +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) + +int +_kc_pci_prepare_to_sleep(struct pci_dev *dev) +{ + pci_power_t target_state; + int error; + + target_state = pci_choose_state(dev, PMSG_SUSPEND); + + pci_enable_wake(dev, target_state, true); + + error = pci_set_power_state(dev, target_state); + + if (error) + pci_enable_wake(dev, target_state, false); + + return error; +} + +int +_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) +{ + int err; + + err = pci_enable_wake(dev, PCI_D3cold, enable); + if (err) + goto out; + + err = pci_enable_wake(dev, PCI_D3hot, enable); + +out: + return err; +} +#endif /* < 2.6.28 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) +static void __kc_pci_set_master(struct pci_dev *pdev, bool enable) +{ + u16 old_cmd, cmd; + + pci_read_config_word(pdev, PCI_COMMAND, &old_cmd); + if (enable) + cmd = old_cmd | PCI_COMMAND_MASTER; + else + cmd = old_cmd & ~PCI_COMMAND_MASTER; + if (cmd != old_cmd) { + dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n", + enable ? "enabling" : "disabling"); + pci_write_config_word(pdev, PCI_COMMAND, cmd); + } +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) ) + pdev->is_busmaster = enable; +#endif +} + +void _kc_pci_clear_master(struct pci_dev *dev) +{ + __kc_pci_set_master(dev, false); +} +#endif /* < 2.6.29 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) +#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +int _kc_pci_num_vf(struct pci_dev *dev) +{ + int num_vf = 0; +#ifdef CONFIG_PCI_IOV + struct pci_dev *vfdev; + + /* loop through all ethernet devices starting at PF dev */ + vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL); + while (vfdev) { + if (vfdev->is_virtfn && vfdev->physfn == dev) + num_vf++; + + vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev); + } + +#endif + return num_vf; +} +#endif /* RHEL_RELEASE_CODE */ +#endif /* < 2.6.34 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) +#ifdef HAVE_TX_MQ +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + struct Qdisc *qdisc; + int i; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if ( txq < real_num) { + dev->real_num_tx_queues = txq; + for (i = txq; i < dev->num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); + } + } + } +} +#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ +#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ +#endif /* HAVE_TX_MQ */ + +ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, + const void __user *from, size_t count) +{ + loff_t pos = *ppos; + size_t res; + + if (pos < 0) + return -EINVAL; + if (pos >= available || !count) + return 0; + if (count > available - pos) + count = available - pos; + res = copy_from_user(to + pos, from, count); + if (res == count) + return -EFAULT; + count -= res; + *ppos = pos + count; + return count; +} + +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +static const u32 _kc_flags_dup_features = + (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); + +u32 _kc_ethtool_op_get_flags(struct net_device *dev) +{ + return dev->features & _kc_flags_dup_features; +} + +int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) +{ + if (data & ~supported) + return -EINVAL; + + dev->features = ((dev->features & ~_kc_flags_dup_features) | + (data & _kc_flags_dup_features)); + return 0; +} +#endif /* < 2.6.36 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) + + + +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ +#endif /* < 2.6.39 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, + int off, int size, unsigned int truesize) +{ + skb_fill_page_desc(skb, i, page, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; +} + +int _kc_simple_open(struct inode *inode, struct file *file) +{ + if (inode->i_private) + file->private_data = inode->i_private; + + return 0; +} + +#endif /* < 3.4.0 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) +#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ + !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) +static inline int __kc_pcie_cap_version(struct pci_dev *dev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); + return reg16 & PCI_EXP_FLAGS_VERS; +} + +static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev) +{ + return true; +} + +static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + + return __kc_pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_ENDPOINT || + type == PCI_EXP_TYPE_LEG_END; +} + +static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + int pos; + u16 pcie_flags_reg; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg); + + return __kc_pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + (type == PCI_EXP_TYPE_DOWNSTREAM && + pcie_flags_reg & PCI_EXP_FLAGS_SLOT); +} + +static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + + return __kc_pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_RC_EC; +} + +static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos) +{ + if (!pci_is_pcie(dev)) + return false; + + switch (pos) { + case PCI_EXP_FLAGS_TYPE: + return true; + case PCI_EXP_DEVCAP: + case PCI_EXP_DEVCTL: + case PCI_EXP_DEVSTA: + return __kc_pcie_cap_has_devctl(dev); + case PCI_EXP_LNKCAP: + case PCI_EXP_LNKCTL: + case PCI_EXP_LNKSTA: + return __kc_pcie_cap_has_lnkctl(dev); + case PCI_EXP_SLTCAP: + case PCI_EXP_SLTCTL: + case PCI_EXP_SLTSTA: + return __kc_pcie_cap_has_sltctl(dev); + case PCI_EXP_RTCTL: + case PCI_EXP_RTCAP: + case PCI_EXP_RTSTA: + return __kc_pcie_cap_has_rtctl(dev); + case PCI_EXP_DEVCAP2: + case PCI_EXP_DEVCTL2: + case PCI_EXP_LNKCAP2: + case PCI_EXP_LNKCTL2: + case PCI_EXP_LNKSTA2: + return __kc_pcie_cap_version(dev) > 1; + default: + return false; + } +} + +/* + * Note that these accessor functions are only for the "PCI Express + * Capability" (see PCIe spec r3.0, sec 7.8). They do not apply to the + * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.) + */ +int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val) +{ + int ret; + + *val = 0; + if (pos & 1) + return -EINVAL; + + if (__kc_pcie_capability_reg_implemented(dev, pos)) { + ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val); + /* + * Reset *val to 0 if pci_read_config_word() fails, it may + * have been written as 0xFFFF if hardware error happens + * during pci_read_config_word(). + */ + if (ret) + *val = 0; + return ret; + } + + /* + * For Functions that do not implement the Slot Capabilities, + * Slot Status, and Slot Control registers, these spaces must + * be hardwired to 0b, with the exception of the Presence Detect + * State bit in the Slot Status register of Downstream Ports, + * which must be hardwired to 1b. (PCIe Base Spec 3.0, sec 7.8) + */ + if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA && + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) { + *val = PCI_EXP_SLTSTA_PDS; + } + + return 0; +} + +int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) +{ + if (pos & 1) + return -EINVAL; + + if (!__kc_pcie_capability_reg_implemented(dev, pos)) + return 0; + + return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); +} + +int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, + u16 clear, u16 set) +{ + int ret; + u16 val; + + ret = __kc_pcie_capability_read_word(dev, pos, &val); + if (!ret) { + val &= ~clear; + val |= set; + ret = __kc_pcie_capability_write_word(dev, pos, val); + } + + return ret; +} +#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ + !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */ +#endif /* < 3.7.0 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) +#endif /* 3.9.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + unsigned int vfs_assigned = 0; +#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED + int pos; + struct pci_dev *vfdev; + unsigned short dev_id; + + /* only search if we are a PF */ + if (!dev->is_physfn) + return 0; + + /* find SR-IOV capability */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return 0; + + /* + * determine the device ID for the VFs, the vendor ID will be the + * same as the PF so there is no need to check for that one + */ + pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); + + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(dev->vendor, dev_id, NULL); + while (vfdev) { + /* + * It is considered assigned if it is a virtual function with + * our dev as the physical function and the assigned bit is set + */ + if (vfdev->is_virtfn && (vfdev->physfn == dev) && + (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) + vfs_assigned++; + + vfdev = pci_get_device(dev->vendor, dev_id, vfdev); + } + +#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ + return vfs_assigned; +} + +#endif /* CONFIG_PCI_IOV */ +#endif /* 3.10.0 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h new file mode 100644 index 00000000..e2cf71e0 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -0,0 +1,3918 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _KCOMPAT_H_ +#define _KCOMPAT_H_ + +#ifndef LINUX_VERSION_CODE +#include +#else +#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* NAPI enable/disable flags here */ +#define NAPI + +#define adapter_struct igb_adapter +#define adapter_q_vector igb_q_vector +#define NAPI + +/* and finally set defines so that the code sees the changes */ +#ifdef NAPI +#else +#endif /* NAPI */ + +/* packet split disable/enable */ +#ifdef DISABLE_PACKET_SPLIT +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +#define CONFIG_IGB_DISABLE_PACKET_SPLIT +#endif +#endif /* DISABLE_PACKET_SPLIT */ + +/* MSI compatibility code for all kernels and drivers */ +#ifdef DISABLE_PCI_MSI +#undef CONFIG_PCI_MSI +#endif +#ifndef CONFIG_PCI_MSI +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +struct msix_entry { + u16 vector; /* kernel uses to write allocated vector */ + u16 entry; /* driver uses to specify entry, OS writes */ +}; +#endif +#undef pci_enable_msi +#define pci_enable_msi(a) -ENOTSUPP +#undef pci_disable_msi +#define pci_disable_msi(a) do {} while (0) +#undef pci_enable_msix +#define pci_enable_msix(a, b, c) -ENOTSUPP +#undef pci_disable_msix +#define pci_disable_msix(a) do {} while (0) +#define msi_remove_pci_irq_vectors(a) do {} while (0) +#endif /* CONFIG_PCI_MSI */ +#ifdef DISABLE_PM +#undef CONFIG_PM +#endif + +#ifdef DISABLE_NET_POLL_CONTROLLER +#undef CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef PMSG_SUSPEND +#define PMSG_SUSPEND 3 +#endif + +/* generic boolean compatibility */ +#undef TRUE +#undef FALSE +#define TRUE true +#define FALSE false +#ifdef GCC_VERSION +#if ( GCC_VERSION < 3000 ) +#define _Bool char +#endif +#else +#define _Bool char +#endif + +/* kernels less than 2.4.14 don't have this */ +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef module_param +#define module_param(v,t,p) MODULE_PARM(v, "i"); +#endif + +#ifndef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffffffffffffULL +#endif + +#ifndef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0x00000000ffffffffULL +#endif + +#ifndef PCI_CAP_ID_EXP +#define PCI_CAP_ID_EXP 0x10 +#endif + +#ifndef PCIE_LINK_STATE_L0S +#define PCIE_LINK_STATE_L0S 1 +#endif +#ifndef PCIE_LINK_STATE_L1 +#define PCIE_LINK_STATE_L1 2 +#endif + +#ifndef mmiowb +#ifdef CONFIG_IA64 +#define mmiowb() asm volatile ("mf.a" ::: "memory") +#else +#define mmiowb() +#endif +#endif + +#ifndef SET_NETDEV_DEV +#define SET_NETDEV_DEV(net, pdev) +#endif + +#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#define free_netdev(x) kfree(x) +#endif + +#ifdef HAVE_POLL_CONTROLLER +#define CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef SKB_DATAREF_SHIFT +/* if we do not have the infrastructure to detect if skb_header is cloned + just return false in all cases */ +#define skb_header_cloned(x) 0 +#endif + +#ifndef NETIF_F_GSO +#define gso_size tso_size +#define gso_segs tso_segs +#endif + +#ifndef NETIF_F_GRO +#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \ + vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan) +#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb) +#endif + +#ifndef NETIF_F_SCTP_CSUM +#define NETIF_F_SCTP_CSUM 0 +#endif + +#ifndef NETIF_F_LRO +#define NETIF_F_LRO (1 << 15) +#endif + +#ifndef NETIF_F_NTUPLE +#define NETIF_F_NTUPLE (1 << 27) +#endif + +#ifndef IPPROTO_SCTP +#define IPPROTO_SCTP 132 +#endif + +#ifndef CHECKSUM_PARTIAL +#define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW +#endif + +#ifndef __read_mostly +#define __read_mostly +#endif + +#ifndef MII_RESV1 +#define MII_RESV1 0x17 /* Reserved... */ +#endif + +#ifndef unlikely +#define unlikely(_x) _x +#define likely(_x) _x +#endif + +#ifndef WARN_ON +#define WARN_ON(x) +#endif + +#ifndef PCI_DEVICE +#define PCI_DEVICE(vend,dev) \ + .vendor = (vend), .device = (dev), \ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID +#endif + +#ifndef node_online +#define node_online(node) ((node) == 0) +#endif + +#ifndef num_online_cpus +#define num_online_cpus() smp_num_cpus +#endif + +#ifndef cpu_online +#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map) +#endif + +#ifndef _LINUX_RANDOM_H +#include +#endif + +#ifndef DECLARE_BITMAP +#ifndef BITS_TO_LONGS +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#endif +#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)] +#endif + +#ifndef VLAN_HLEN +#define VLAN_HLEN 4 +#endif + +#ifndef VLAN_ETH_HLEN +#define VLAN_ETH_HLEN 18 +#endif + +#ifndef VLAN_ETH_FRAME_LEN +#define VLAN_ETH_FRAME_LEN 1518 +#endif + +#if !defined(IXGBE_DCA) && !defined(IGB_DCA) +#define dca_get_tag(b) 0 +#define dca_add_requester(a) -1 +#define dca_remove_requester(b) do { } while(0) +#define DCA_PROVIDER_ADD 0x0001 +#define DCA_PROVIDER_REMOVE 0x0002 +#endif + +#ifndef DCA_GET_TAG_TWO_ARGS +#define dca3_get_tag(a,b) dca_get_tag(b) +#endif + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#if defined(__i386__) || defined(__x86_64__) +#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#endif +#endif + +/* taken from 2.6.24 definition in linux/kernel.h */ +#ifndef IS_ALIGNED +#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0) +#endif + +#ifdef IS_ENABLED +#undef IS_ENABLED +#undef __ARG_PLACEHOLDER_1 +#undef config_enabled +#undef _config_enabled +#undef __config_enabled +#undef ___config_enabled +#endif + +#define __ARG_PLACEHOLDER_1 0, +#define config_enabled(cfg) _config_enabled(cfg) +#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value) +#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0) +#define ___config_enabled(__ignored, val, ...) val + +#define IS_ENABLED(option) \ + (config_enabled(option) || config_enabled(option##_MODULE)) + +#if !defined(NETIF_F_HW_VLAN_TX) && !defined(NETIF_F_HW_VLAN_CTAG_TX) +struct _kc_vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_ethhdr _kc_vlan_ethhdr +struct _kc_vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_hdr _kc_vlan_hdr +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#define vlan_tx_tag_present(_skb) 0 +#define vlan_tx_tag_get(_skb) 0 +#endif +#endif /* NETIF_F_HW_VLAN_TX && NETIF_F_HW_VLAN_CTAG_TX */ + +#ifndef VLAN_PRIO_SHIFT +#define VLAN_PRIO_SHIFT 13 +#endif + + +#ifndef __GFP_COLD +#define __GFP_COLD 0 +#endif + +#ifndef __GFP_COMP +#define __GFP_COMP 0 +#endif + +/*****************************************************************************/ +/* Installations with ethtool version without eeprom, adapter id, or statistics + * support */ + +#ifndef ETH_GSTRING_LEN +#define ETH_GSTRING_LEN 32 +#endif + +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x1d +#undef ethtool_drvinfo +#define ethtool_drvinfo k_ethtool_drvinfo +struct k_ethtool_drvinfo { + u32 cmd; + char driver[32]; + char version[32]; + char fw_version[32]; + char bus_info[32]; + char reserved1[32]; + char reserved2[16]; + u32 n_stats; + u32 testinfo_len; + u32 eedump_len; + u32 regdump_len; +}; + +struct ethtool_stats { + u32 cmd; + u32 n_stats; + u64 data[0]; +}; +#endif /* ETHTOOL_GSTATS */ + +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x1c +#endif /* ETHTOOL_PHYS_ID */ + +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x1b +enum ethtool_stringset { + ETH_SS_TEST = 0, + ETH_SS_STATS, +}; +struct ethtool_gstrings { + u32 cmd; /* ETHTOOL_GSTRINGS */ + u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ + u32 len; /* number of strings in the string set */ + u8 data[0]; +}; +#endif /* ETHTOOL_GSTRINGS */ + +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x1a +enum ethtool_test_flags { + ETH_TEST_FL_OFFLINE = (1 << 0), + ETH_TEST_FL_FAILED = (1 << 1), +}; +struct ethtool_test { + u32 cmd; + u32 flags; + u32 reserved; + u32 len; + u64 data[0]; +}; +#endif /* ETHTOOL_TEST */ + +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0xb +#undef ETHTOOL_GREGS +struct ethtool_eeprom { + u32 cmd; + u32 magic; + u32 offset; + u32 len; + u8 data[0]; +}; + +struct ethtool_value { + u32 cmd; + u32 data; +}; +#endif /* ETHTOOL_GEEPROM */ + +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0xa +#endif /* ETHTOOL_GLINK */ + +#ifndef ETHTOOL_GWOL +#define ETHTOOL_GWOL 0x5 +#define ETHTOOL_SWOL 0x6 +#define SOPASS_MAX 6 +struct ethtool_wolinfo { + u32 cmd; + u32 supported; + u32 wolopts; + u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */ +}; +#endif /* ETHTOOL_GWOL */ + +#ifndef ETHTOOL_GREGS +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ +#define ethtool_regs _kc_ethtool_regs +/* for passing big chunks of data */ +struct _kc_ethtool_regs { + u32 cmd; + u32 version; /* driver-specific, indicates different chips/revs */ + u32 len; /* bytes */ + u8 data[0]; +}; +#endif /* ETHTOOL_GREGS */ + +#ifndef ETHTOOL_GMSGLVL +#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ +#endif +#ifndef ETHTOOL_SMSGLVL +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ +#endif +#ifndef ETHTOOL_NWAY_RST +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ +#endif +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0x0000000a /* Get link status */ +#endif +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ +#endif +#ifndef ETHTOOL_SEEPROM +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ +#endif +#ifndef ETHTOOL_GCOALESCE +#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ +/* for configuring coalescing parameters of chip */ +#define ethtool_coalesce _kc_ethtool_coalesce +struct _kc_ethtool_coalesce { + u32 cmd; /* ETHTOOL_{G,S}COALESCE */ + + /* How many usecs to delay an RX interrupt after + * a packet arrives. If 0, only rx_max_coalesced_frames + * is used. + */ + u32 rx_coalesce_usecs; + + /* How many packets to delay an RX interrupt after + * a packet arrives. If 0, only rx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause RX interrupts to never be + * generated. + */ + u32 rx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 rx_coalesce_usecs_irq; + u32 rx_max_coalesced_frames_irq; + + /* How many usecs to delay a TX interrupt after + * a packet is sent. If 0, only tx_max_coalesced_frames + * is used. + */ + u32 tx_coalesce_usecs; + + /* How many packets to delay a TX interrupt after + * a packet is sent. If 0, only tx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause TX interrupts to never be + * generated. + */ + u32 tx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 tx_coalesce_usecs_irq; + u32 tx_max_coalesced_frames_irq; + + /* How many usecs to delay in-memory statistics + * block updates. Some drivers do not have an in-memory + * statistic block, and in such cases this value is ignored. + * This value must not be zero. + */ + u32 stats_block_coalesce_usecs; + + /* Adaptive RX/TX coalescing is an algorithm implemented by + * some drivers to improve latency under low packet rates and + * improve throughput under high packet rates. Some drivers + * only implement one of RX or TX adaptive coalescing. Anything + * not implemented by the driver causes these values to be + * silently ignored. + */ + u32 use_adaptive_rx_coalesce; + u32 use_adaptive_tx_coalesce; + + /* When the packet rate (measured in packets per second) + * is below pkt_rate_low, the {rx,tx}_*_low parameters are + * used. + */ + u32 pkt_rate_low; + u32 rx_coalesce_usecs_low; + u32 rx_max_coalesced_frames_low; + u32 tx_coalesce_usecs_low; + u32 tx_max_coalesced_frames_low; + + /* When the packet rate is below pkt_rate_high but above + * pkt_rate_low (both measured in packets per second) the + * normal {rx,tx}_* coalescing parameters are used. + */ + + /* When the packet rate is (measured in packets per second) + * is above pkt_rate_high, the {rx,tx}_*_high parameters are + * used. + */ + u32 pkt_rate_high; + u32 rx_coalesce_usecs_high; + u32 rx_max_coalesced_frames_high; + u32 tx_coalesce_usecs_high; + u32 tx_max_coalesced_frames_high; + + /* How often to do adaptive coalescing packet rate sampling, + * measured in seconds. Must not be zero. + */ + u32 rate_sample_interval; +}; +#endif /* ETHTOOL_GCOALESCE */ + +#ifndef ETHTOOL_SCOALESCE +#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ +#endif +#ifndef ETHTOOL_GRINGPARAM +#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ +/* for configuring RX/TX ring parameters */ +#define ethtool_ringparam _kc_ethtool_ringparam +struct _kc_ethtool_ringparam { + u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ + + /* Read only attributes. These indicate the maximum number + * of pending RX/TX ring entries the driver will allow the + * user to set. + */ + u32 rx_max_pending; + u32 rx_mini_max_pending; + u32 rx_jumbo_max_pending; + u32 tx_max_pending; + + /* Values changeable by the user. The valid values are + * in the range 1 to the "*_max_pending" counterpart above. + */ + u32 rx_pending; + u32 rx_mini_pending; + u32 rx_jumbo_pending; + u32 tx_pending; +}; +#endif /* ETHTOOL_GRINGPARAM */ + +#ifndef ETHTOOL_SRINGPARAM +#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ +#endif +#ifndef ETHTOOL_GPAUSEPARAM +#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ +/* for configuring link flow control parameters */ +#define ethtool_pauseparam _kc_ethtool_pauseparam +struct _kc_ethtool_pauseparam { + u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ + + /* If the link is being auto-negotiated (via ethtool_cmd.autoneg + * being true) the user may set 'autoneg' here non-zero to have the + * pause parameters be auto-negotiated too. In such a case, the + * {rx,tx}_pause values below determine what capabilities are + * advertised. + * + * If 'autoneg' is zero or the link is not being auto-negotiated, + * then {rx,tx}_pause force the driver to use/not-use pause + * flow control. + */ + u32 autoneg; + u32 rx_pause; + u32 tx_pause; +}; +#endif /* ETHTOOL_GPAUSEPARAM */ + +#ifndef ETHTOOL_SPAUSEPARAM +#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ +#endif +#ifndef ETHTOOL_GRXCSUM +#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_SRXCSUM +#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GTXCSUM +#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STXCSUM +#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GSG +#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable + * (ethtool_value) */ +#endif +#ifndef ETHTOOL_SSG +#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable + * (ethtool_value). */ +#endif +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ +#endif +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ +#endif +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ +#endif +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ +#endif +#ifndef ETHTOOL_GTSO +#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STSO +#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#endif + +#ifndef ETHTOOL_BUSINFO_LEN +#define ETHTOOL_BUSINFO_LEN 32 +#endif + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif +#ifndef AX_RELEASE_VERSION +#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif + +#ifndef AX_RELEASE_CODE +#define AX_RELEASE_CODE 0 +#endif + +#if (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,0)) +#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,0) +#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,1)) +#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,1) +#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,2)) +#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,3) +#endif + +#ifndef RHEL_RELEASE_CODE +/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */ +#define RHEL_RELEASE_CODE 0 +#endif + +/* SuSE version macro is the same as Linux kernel version */ +#ifndef SLE_VERSION +#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) +#endif +#ifdef CONFIG_SUSE_KERNEL +#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) +/* SLES11 GA is 2.6.27 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,0,0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0))) +/* SLES11 SP3 is at least 3.0.61+ based */ +#define SLE_VERSION_CODE SLE_VERSION(11,3,0) +#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) +/* SLES12 is at least 3.12.28+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12,0,0) +#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ +#endif /* CONFIG_SUSE_KERNEL */ +#ifndef SLE_VERSION_CODE +#define SLE_VERSION_CODE 0 +#endif /* SLE_VERSION_CODE */ + +/* Ubuntu release and kernel codes must be specified from Makefile */ +#ifndef UBUNTU_RELEASE_VERSION +#define UBUNTU_RELEASE_VERSION(a,b) (((a) * 100) + (b)) +#endif +#ifndef UBUNTU_KERNEL_VERSION +#define UBUNTU_KERNEL_VERSION(a,b,c,abi,upload) (((a) << 40) + ((b) << 32) + ((c) << 24) + ((abi) << 8) + (upload)) +#endif +#ifndef UBUNTU_RELEASE_CODE +#define UBUNTU_RELEASE_CODE 0 +#endif +#ifndef UBUNTU_KERNEL_CODE +#define UBUNTU_KERNEL_CODE 0 +#endif + +#ifdef __KLOCWORK__ +#ifdef ARRAY_SIZE +#undef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif +#endif /* __KLOCWORK__ */ + +/*****************************************************************************/ +/* 2.4.3 => 2.4.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +#ifndef pci_set_dma_mask +#define pci_set_dma_mask _kc_pci_set_dma_mask +extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); +#endif + +#ifndef pci_request_regions +#define pci_request_regions _kc_pci_request_regions +extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name); +#endif + +#ifndef pci_release_regions +#define pci_release_regions _kc_pci_release_regions +extern void _kc_pci_release_regions(struct pci_dev *pdev); +#endif + +/**************************************/ +/* NETWORK DRIVER API */ + +#ifndef alloc_etherdev +#define alloc_etherdev _kc_alloc_etherdev +extern struct net_device * _kc_alloc_etherdev(int sizeof_priv); +#endif + +#ifndef is_valid_ether_addr +#define is_valid_ether_addr _kc_is_valid_ether_addr +extern int _kc_is_valid_ether_addr(u8 *addr); +#endif + +/**************************************/ +/* MISCELLANEOUS */ + +#ifndef INIT_TQUEUE +#define INIT_TQUEUE(_tq, _routine, _data) \ + do { \ + INIT_LIST_HEAD(&(_tq)->list); \ + (_tq)->sync = 0; \ + (_tq)->routine = _routine; \ + (_tq)->data = _data; \ + } while (0) +#endif + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) ) +/* Generic MII registers. */ +#define MII_BMCR 0x00 /* Basic mode control register */ +#define MII_BMSR 0x01 /* Basic mode status register */ +#define MII_PHYSID1 0x02 /* PHYS ID 1 */ +#define MII_PHYSID2 0x03 /* PHYS ID 2 */ +#define MII_ADVERTISE 0x04 /* Advertisement control reg */ +#define MII_LPA 0x05 /* Link partner ability reg */ +#define MII_EXPANSION 0x06 /* Expansion register */ +/* Basic mode control register. */ +#define BMCR_FULLDPLX 0x0100 /* Full duplex */ +#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */ +/* Basic mode status register. */ +#define BMSR_ERCAP 0x0001 /* Ext-reg capability */ +#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ +#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ +#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ +#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ +#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */ +/* Advertisement control register. */ +#define ADVERTISE_CSMA 0x0001 /* Only selector supported */ +#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ +#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \ + ADVERTISE_100HALF | ADVERTISE_100FULL) +/* Expansion register for auto-negotiation. */ +#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */ +#endif + +/*****************************************************************************/ +/* 2.4.6 => 2.4.3 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +#ifndef pci_set_power_state +#define pci_set_power_state _kc_pci_set_power_state +extern int _kc_pci_set_power_state(struct pci_dev *dev, int state); +#endif + +#ifndef pci_enable_wake +#define pci_enable_wake _kc_pci_enable_wake +extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable); +#endif + +#ifndef pci_disable_device +#define pci_disable_device _kc_pci_disable_device +extern void _kc_pci_disable_device(struct pci_dev *pdev); +#endif + +/* PCI PM entry point syntax changed, so don't support suspend/resume */ +#undef CONFIG_PM + +#endif /* 2.4.6 => 2.4.3 */ + +#ifndef HAVE_PCI_SET_MWI +#define pci_set_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \ + PCI_COMMAND_INVALIDATE); +#define pci_clear_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \ + ~PCI_COMMAND_INVALIDATE); +#endif + +/*****************************************************************************/ +/* 2.4.10 => 2.4.9 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) ) + +/**************************************/ +/* MODULE API */ + +#ifndef MODULE_LICENSE + #define MODULE_LICENSE(X) +#endif + +/**************************************/ +/* OTHER */ + +#undef min +#define min(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#undef max +#define max(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#define min_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x < _y ? _x : _y; }) + +#define max_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x > _y ? _x : _y; }) + +#ifndef list_for_each_safe +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) +#endif + +#ifndef ____cacheline_aligned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_aligned_in_smp ____cacheline_aligned +#else +#define ____cacheline_aligned_in_smp +#endif /* CONFIG_SMP */ +#endif + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...); +#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args) +extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args) +#else /* 2.4.8 => 2.4.9 */ +extern int snprintf(char * buf, size_t size, const char *fmt, ...); +extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#endif +#endif /* 2.4.10 -> 2.4.6 */ + + +/*****************************************************************************/ +/* 2.4.12 => 2.4.10 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) ) +#ifndef HAVE_NETIF_MSG +#define HAVE_NETIF_MSG 1 +enum { + NETIF_MSG_DRV = 0x0001, + NETIF_MSG_PROBE = 0x0002, + NETIF_MSG_LINK = 0x0004, + NETIF_MSG_TIMER = 0x0008, + NETIF_MSG_IFDOWN = 0x0010, + NETIF_MSG_IFUP = 0x0020, + NETIF_MSG_RX_ERR = 0x0040, + NETIF_MSG_TX_ERR = 0x0080, + NETIF_MSG_TX_QUEUED = 0x0100, + NETIF_MSG_INTR = 0x0200, + NETIF_MSG_TX_DONE = 0x0400, + NETIF_MSG_RX_STATUS = 0x0800, + NETIF_MSG_PKTDATA = 0x1000, + NETIF_MSG_HW = 0x2000, + NETIF_MSG_WOL = 0x4000, +}; + +#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) +#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) +#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) +#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) +#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) +#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) +#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) +#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) +#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) +#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) +#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) +#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) +#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) +#endif /* !HAVE_NETIF_MSG */ +#endif /* 2.4.12 => 2.4.10 */ + +/*****************************************************************************/ +/* 2.4.13 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#ifndef virt_to_page + #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT)) +#endif + +#ifndef pci_map_page +#define pci_map_page _kc_pci_map_page +extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction); +#endif + +#ifndef pci_unmap_page +#define pci_unmap_page _kc_pci_unmap_page +extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction); +#endif + +/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */ + +#undef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0xffffffff +#undef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffff + +/**************************************/ +/* OTHER */ + +#ifndef cpu_relax +#define cpu_relax() rep_nop() +#endif + +struct vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_vlan_proto; + unsigned short h_vlan_TCI; + unsigned short h_vlan_encapsulated_proto; +}; +#endif /* 2.4.13 => 2.4.12 */ + +/*****************************************************************************/ +/* 2.4.17 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) ) + +#ifndef __devexit_p + #define __devexit_p(x) &(x) +#endif + +#else + /* For Kernel 3.8 these are not defined - so undefine all */ + #undef __devexit_p + #undef __devexit + #undef __devinit + #undef __devinitdata + #define __devexit_p(x) &(x) + #define __devexit + #define __devinit + #define __devinitdata + +#endif /* 2.4.17 => 2.4.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ) +#define NETIF_MSG_HW 0x2000 +#define NETIF_MSG_WOL 0x4000 + +#ifndef netif_msg_hw +#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) +#endif +#ifndef netif_msg_wol +#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) +#endif +#endif /* 2.4.18 */ + +/*****************************************************************************/ + +/*****************************************************************************/ +/* 2.4.20 => 2.4.19 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) ) + +/* we won't support NAPI on less than 2.4.20 */ +#ifdef NAPI +#undef NAPI +#endif + +#endif /* 2.4.20 => 2.4.19 */ + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#define pci_name(x) ((x)->slot_name) + +#ifndef SUPPORTED_10000baseT_Full +#define SUPPORTED_10000baseT_Full (1 << 12) +#endif +#ifndef ADVERTISED_10000baseT_Full +#define ADVERTISED_10000baseT_Full (1 << 12) +#endif +#endif + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#ifndef IGB_NO_LRO +#define IGB_NO_LRO +#endif +#endif + +/*****************************************************************************/ +/*****************************************************************************/ +/* 2.4.23 => 2.4.22 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) ) +/*****************************************************************************/ +#ifdef NAPI +#ifndef netif_poll_disable +#define netif_poll_disable(x) _kc_netif_poll_disable(x) +static inline void _kc_netif_poll_disable(struct net_device *netdev) +{ + while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) { + /* No hurry */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } +} +#endif +#ifndef netif_poll_enable +#define netif_poll_enable(x) _kc_netif_poll_enable(x) +static inline void _kc_netif_poll_enable(struct net_device *netdev) +{ + clear_bit(__LINK_STATE_RX_SCHED, &netdev->state); +} +#endif +#endif /* NAPI */ +#ifndef netif_tx_disable +#define netif_tx_disable(x) _kc_netif_tx_disable(x) +static inline void _kc_netif_tx_disable(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + netif_stop_queue(dev); + spin_unlock_bh(&dev->xmit_lock); +} +#endif +#else /* 2.4.23 => 2.4.22 */ +#define HAVE_SCTP +#endif /* 2.4.23 => 2.4.22 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) ) +#define ETHTOOL_OPS_COMPAT +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) +#define __user +#endif /* < 2.4.27 */ + +/*****************************************************************************/ +/* 2.5.71 => 2.4.x */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) ) +#define sk_protocol protocol +#define pci_get_device pci_find_device +#endif /* 2.5.70 => 2.4.x */ + +/*****************************************************************************/ +/* < 2.4.27 or 2.6.0 <= 2.6.5 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) ) + +#ifndef netif_msg_init +#define netif_msg_init _kc_netif_msg_init +static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits) +{ + /* use default */ + if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) + return default_msg_enable_bits; + if (debug_value == 0) /* no output */ + return 0; + /* set low N bits */ + return (1 << debug_value) -1; +} +#endif + +#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */ +/*****************************************************************************/ +#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \ + (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \ + ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ))) +#define netdev_priv(x) x->priv +#endif + +/*****************************************************************************/ +/* <= 2.5.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) ) +#include +#undef pci_register_driver +#define pci_register_driver pci_module_init + +/* + * Most of the dma compat code is copied/modifed from the 2.4.37 + * /include/linux/libata-compat.h header file + */ +/* These definitions mirror those in pci.h, so they can be used + * interchangeably with their PCI_ counterparts */ +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +struct device { + struct pci_dev pdev; +}; + +static inline struct pci_dev *to_pci_dev (struct device *dev) +{ + return (struct pci_dev *) dev; +} +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return (struct device *) pdev; +} + +#define pdev_printk(lvl, pdev, fmt, args...) \ + printk("%s %s: " fmt, lvl, pci_name(pdev), ## args) +#define dev_err(dev, fmt, args...) \ + pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args) +#define dev_info(dev, fmt, args...) \ + pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args) +#define dev_warn(dev, fmt, args...) \ + pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args) +#define dev_notice(dev, fmt, args...) \ + pdev_printk(KERN_NOTICE, to_pci_dev(dev), fmt, ## args) +#define dev_dbg(dev, fmt, args...) \ + pdev_printk(KERN_DEBUG, to_pci_dev(dev), fmt, ## args) + +/* NOTE: dangerous! we ignore the 'gfp' argument */ +#define dma_alloc_coherent(dev,sz,dma,gfp) \ + pci_alloc_consistent(to_pci_dev(dev),(sz),(dma)) +#define dma_free_coherent(dev,sz,addr,dma_addr) \ + pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr)) + +#define dma_map_page(dev,a,b,c,d) \ + pci_map_page(to_pci_dev(dev),(a),(b),(c),(d)) +#define dma_unmap_page(dev,a,b,c) \ + pci_unmap_page(to_pci_dev(dev),(a),(b),(c)) + +#define dma_map_single(dev,a,b,c) \ + pci_map_single(to_pci_dev(dev),(a),(b),(c)) +#define dma_unmap_single(dev,a,b,c) \ + pci_unmap_single(to_pci_dev(dev),(a),(b),(c)) + +#define dma_map_sg(dev, sg, nents, dir) \ + pci_map_sg(to_pci_dev(dev), (sg), (nents), (dir) +#define dma_unmap_sg(dev, sg, nents, dir) \ + pci_unmap_sg(to_pci_dev(dev), (sg), (nents), (dir) + +#define dma_sync_single(dev,a,b,c) \ + pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c)) + +/* for range just sync everything, that's all the pci API can do */ +#define dma_sync_single_range(dev,addr,off,sz,dir) \ + pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir)) + +#define dma_set_mask(dev,mask) \ + pci_set_dma_mask(to_pci_dev(dev),(mask)) + +/* hlist_* code - double linked lists */ +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = NULL; + n->pprev = NULL; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +static inline void INIT_HLIST_NODE(struct hlist_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} + +#ifndef might_sleep +#define might_sleep() +#endif +#else +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return &pdev->dev; +} +#endif /* <= 2.5.0 */ + +/*****************************************************************************/ +/* 2.5.28 => 2.4.23 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) + +#include +#define work_struct tq_struct +#undef INIT_WORK +#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a) +#undef container_of +#define container_of list_entry +#define schedule_work schedule_task +#define flush_scheduled_work flush_scheduled_tasks +#define cancel_work_sync(x) flush_scheduled_work() + +#endif /* 2.5.28 => 2.4.17 */ + +/*****************************************************************************/ +/* 2.6.0 => 2.5.28 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#ifndef read_barrier_depends +#define read_barrier_depends() rmb() +#endif + +#undef get_cpu +#define get_cpu() smp_processor_id() +#undef put_cpu +#define put_cpu() do { } while(0) +#define MODULE_INFO(version, _version) +#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT +#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1 +#endif +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1 +#endif + +#define dma_set_coherent_mask(dev,mask) 1 + +#undef dev_put +#define dev_put(dev) __dev_put(dev) + +#ifndef skb_fill_page_desc +#define skb_fill_page_desc _kc_skb_fill_page_desc +extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size); +#endif + +#undef ALIGN +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) + +#ifndef page_count +#define page_count(p) atomic_read(&(p)->count) +#endif + +#ifdef MAX_NUMNODES +#undef MAX_NUMNODES +#endif +#define MAX_NUMNODES 1 + +/* find_first_bit and find_next bit are not defined for most + * 2.4 kernels (except for the redhat 2.4.21 kernels + */ +#include +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#undef find_next_bit +#define find_next_bit _kc_find_next_bit +extern unsigned long _kc_find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) + + +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (strchr(dev->name, '%')) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#ifndef strlcpy +#define strlcpy _kc_strlcpy +extern size_t _kc_strlcpy(char *dest, const char *src, size_t size); +#endif /* strlcpy */ + +#ifndef do_div +#if BITS_PER_LONG == 64 +# define do_div(n,base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + __rem = ((uint64_t)(n)) % __base; \ + (n) = ((uint64_t)(n)) / __base; \ + __rem; \ + }) +#elif BITS_PER_LONG == 32 +extern uint32_t _kc__div64_32(uint64_t *dividend, uint32_t divisor); +# define do_div(n,base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + if (likely(((n) >> 32) == 0)) { \ + __rem = (uint32_t)(n) % __base; \ + (n) = (uint32_t)(n) / __base; \ + } else \ + __rem = _kc__div64_32(&(n), __base); \ + __rem; \ + }) +#else /* BITS_PER_LONG == ?? */ +# error do_div() does not yet support the C64 +#endif /* BITS_PER_LONG */ +#endif /* do_div */ + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC 1000000000L +#endif + +#undef HAVE_I2C_SUPPORT +#else /* 2.6.0 */ +#if IS_ENABLED(CONFIG_I2C_ALGOBIT) && \ + (RHEL_RELEASE_CODE && (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,9))) +#define HAVE_I2C_SUPPORT +#endif /* IS_ENABLED(CONFIG_I2C_ALGOBIT) */ + +#endif /* 2.6.0 => 2.5.28 */ +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ) +#define dma_pool pci_pool +#define dma_pool_destroy pci_pool_destroy +#define dma_pool_alloc pci_pool_alloc +#define dma_pool_free pci_pool_free + +#define dma_pool_create(name,dev,size,align,allocation) \ + pci_pool_create((name),to_pci_dev(dev),(size),(align),(allocation)) +#endif /* < 2.6.3 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +#define MODULE_VERSION(_version) MODULE_INFO(version, _version) +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +/* 2.6.5 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) +#define dma_sync_single_for_cpu dma_sync_single +#define dma_sync_single_for_device dma_sync_single +#define dma_sync_single_range_for_cpu dma_sync_single_range +#define dma_sync_single_range_for_device dma_sync_single_range +#ifndef pci_dma_mapping_error +#define pci_dma_mapping_error _kc_pci_dma_mapping_error +static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr) +{ + return dma_addr == 0; +} +#endif +#endif /* 2.6.5 => 2.6.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...); +#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args) +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) ) +/* taken from 2.6 include/linux/bitmap.h */ +#undef bitmap_zero +#define bitmap_zero _kc_bitmap_zero +static inline void _kc_bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} +#define random_ether_addr _kc_random_ether_addr +static inline void _kc_random_ether_addr(u8 *addr) +{ + get_random_bytes(addr, ETH_ALEN); + addr[0] &= 0xfe; /* clear multicast */ + addr[0] |= 0x02; /* set local assignment */ +} +#define page_to_nid(x) 0 + +#endif /* < 2.6.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) ) +#undef if_mii +#define if_mii _kc_if_mii +static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq) +{ + return (struct mii_ioctl_data *) &rq->ifr_ifru; +} + +#ifndef __force +#define __force +#endif +#endif /* < 2.6.7 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +#ifndef PCI_EXP_DEVCTL +#define PCI_EXP_DEVCTL 8 +#endif +#ifndef PCI_EXP_DEVCTL_CERE +#define PCI_EXP_DEVCTL_CERE 0x0001 +#endif +#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \ + schedule_timeout((x * HZ)/1000 + 2); \ + } while (0) + +#endif /* < 2.6.8 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) +#include +#define __iomem + +#ifndef kcalloc +#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags) +extern void *_kc_kzalloc(size_t size, int flags); +#endif +#define MSEC_PER_SEC 1000L +static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j) +{ +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); +#else + return (j * MSEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return m * (HZ / MSEC_PER_SEC); +#else + return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; +#endif +} + +#define msleep_interruptible _kc_msleep_interruptible +static inline unsigned long _kc_msleep_interruptible(unsigned int msecs) +{ + unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1; + + while (timeout && !signal_pending(current)) { + __set_current_state(TASK_INTERRUPTIBLE); + timeout = schedule_timeout(timeout); + } + return _kc_jiffies_to_msecs(timeout); +} + +/* Basic mode control register. */ +#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ + +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 +#endif +#ifndef __be16 +#define __be16 u16 +#endif +#ifndef __be32 +#define __be32 u32 +#endif +#ifndef __be64 +#define __be64 u64 +#endif + +static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) +{ + return (struct vlan_ethhdr *)skb->mac.raw; +} + +/* Wake-On-Lan options. */ +#define WAKE_PHY (1 << 0) +#define WAKE_UCAST (1 << 1) +#define WAKE_MCAST (1 << 2) +#define WAKE_BCAST (1 << 3) +#define WAKE_ARP (1 << 4) +#define WAKE_MAGIC (1 << 5) +#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ + +#define skb_header_pointer _kc_skb_header_pointer +static inline void *_kc_skb_header_pointer(const struct sk_buff *skb, + int offset, int len, void *buffer) +{ + int hlen = skb_headlen(skb); + + if (hlen - offset >= len) + return skb->data + offset; + +#ifdef MAX_SKB_FRAGS + if (skb_copy_bits(skb, offset, buffer, len) < 0) + return NULL; + + return buffer; +#else + return NULL; +#endif + +#ifndef NETDEV_TX_OK +#define NETDEV_TX_OK 0 +#endif +#ifndef NETDEV_TX_BUSY +#define NETDEV_TX_BUSY 1 +#endif +#ifndef NETDEV_TX_LOCKED +#define NETDEV_TX_LOCKED -1 +#endif +} + +#ifndef __bitwise +#define __bitwise +#endif +#endif /* < 2.6.9 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +#ifdef module_param_array_named +#undef module_param_array_named +#define module_param_array_named(name, array, type, nump, perm) \ + static struct kparam_array __param_arr_##name \ + = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \ + sizeof(array[0]), array }; \ + module_param_call(name, param_array_set, param_array_get, \ + &__param_arr_##name, perm) +#endif /* module_param_array_named */ +/* + * num_online is broken for all < 2.6.10 kernels. This is needed to support + * Node module parameter of ixgbe. + */ +#undef num_online_nodes +#define num_online_nodes(n) 1 +extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES); +#undef node_online_map +#define node_online_map _kcompat_node_online_map +#define pci_get_class pci_find_class +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) ) +#define PCI_D0 0 +#define PCI_D1 1 +#define PCI_D2 2 +#define PCI_D3hot 3 +#define PCI_D3cold 4 +typedef int pci_power_t; +#define pci_choose_state(pdev,state) state +#define PMSG_SUSPEND 3 +#define PCI_EXP_LNKCTL 16 + +#undef NETIF_F_LLTX + +#ifndef ARCH_HAS_PREFETCH +#define prefetch(X) +#endif + +#ifndef NET_IP_ALIGN +#define NET_IP_ALIGN 2 +#endif + +#define KC_USEC_PER_SEC 1000000L +#define usecs_to_jiffies _kc_usecs_to_jiffies +static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j) +{ +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (KC_USEC_PER_SEC / HZ) * j; +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC); +#else + return (j * KC_USEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ); +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return m * (HZ / KC_USEC_PER_SEC); +#else + return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC; +#endif +} + +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTSTA 32 /* Root Status */ +#endif /* < 2.6.11 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) ) +#include +#define USE_REBOOT_NOTIFIER + +/* Generic MII registers. */ +#define MII_CTRL1000 0x09 /* 1000BASE-T control */ +#define MII_STAT1000 0x0a /* 1000BASE-T status */ +/* Advertisement control register. */ +#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ +#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */ +/* Link partner ability register. */ +#define LPA_PAUSE_CAP 0x0400 /* Can pause */ +#define LPA_PAUSE_ASYM 0x0800 /* Can pause asymetrically */ +/* 1000BASE-T Control register */ +#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ +#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */ +/* 1000BASE-T Status register */ +#define LPA_1000LOCALRXOK 0x2000 /* Link partner local receiver status */ +#define LPA_1000REMRXOK 0x1000 /* Link partner remote receiver status */ + +#ifndef is_zero_ether_addr +#define is_zero_ether_addr _kc_is_zero_ether_addr +static inline int _kc_is_zero_ether_addr(const u8 *addr) +{ + return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); +} +#endif /* is_zero_ether_addr */ +#ifndef is_multicast_ether_addr +#define is_multicast_ether_addr _kc_is_multicast_ether_addr +static inline int _kc_is_multicast_ether_addr(const u8 *addr) +{ + return addr[0] & 0x01; +} +#endif /* is_multicast_ether_addr */ +#endif /* < 2.6.12 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +#ifndef kstrdup +#define kstrdup _kc_kstrdup +extern char *_kc_kstrdup(const char *s, unsigned int gfp); +#endif +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +#define pm_message_t u32 +#ifndef kzalloc +#define kzalloc _kc_kzalloc +extern void *_kc_kzalloc(size_t size, int flags); +#endif + +/* Generic MII registers. */ +#define MII_ESTATUS 0x0f /* Extended Status */ +/* Basic mode status register. */ +#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ +/* Extended status register. */ +#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ +#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ + +#define SUPPORTED_Pause (1 << 13) +#define SUPPORTED_Asym_Pause (1 << 14) +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#if (!(RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)))) +#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t)) +#define gfp_t unsigned +#else +typedef unsigned gfp_t; +#endif +#endif /* !RHEL4.3->RHEL5.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) ) +#ifdef CONFIG_X86_64 +#define dma_sync_single_range_for_cpu(dev, addr, off, sz, dir) \ + dma_sync_single_for_cpu((dev), (addr), (off) + (sz), (dir)) +#define dma_sync_single_range_for_device(dev, addr, off, sz, dir) \ + dma_sync_single_for_device((dev), (addr), (off) + (sz), (dir)) +#endif +#endif +#endif /* < 2.6.14 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) ) +#ifndef vmalloc_node +#define vmalloc_node(a,b) vmalloc(a) +#endif /* vmalloc_node*/ + +#define setup_timer(_timer, _function, _data) \ +do { \ + (_timer)->function = _function; \ + (_timer)->data = _data; \ + init_timer(_timer); \ +} while (0) +#ifndef device_can_wakeup +#define device_can_wakeup(dev) (1) +#endif +#ifndef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) do{}while(0) +#endif +#ifndef device_init_wakeup +#define device_init_wakeup(dev,val) do {} while (0) +#endif +static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2) +{ + const u16 *a = (const u16 *) addr1; + const u16 *b = (const u16 *) addr2; + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +} +#undef compare_ether_addr +#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2) +#endif /* < 2.6.15 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ) +#undef DEFINE_MUTEX +#define DEFINE_MUTEX(x) DECLARE_MUTEX(x) +#define mutex_lock(x) down_interruptible(x) +#define mutex_unlock(x) up(x) + +#ifndef ____cacheline_internodealigned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp +#else +#define ____cacheline_internodealigned_in_smp +#endif /* CONFIG_SMP */ +#endif /* ____cacheline_internodealigned_in_smp */ +#undef HAVE_PCI_ERS +#else /* 2.6.16 and above */ +#undef HAVE_PCI_ERS +#define HAVE_PCI_ERS +#if ( SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(10,4,0) ) +#ifdef device_can_wakeup +#undef device_can_wakeup +#endif /* device_can_wakeup */ +#define device_can_wakeup(dev) 1 +#endif /* SLE_VERSION(10,4,0) */ +#endif /* < 2.6.16 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) ) +#ifndef dev_notice +#define dev_notice(dev, fmt, args...) \ + dev_printk(KERN_NOTICE, dev, fmt, ## args) +#endif + +#ifndef first_online_node +#define first_online_node 0 +#endif +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif +#endif /* < 2.6.17 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ) + +#ifndef IRQ_HANDLED +#define irqreturn_t void +#define IRQ_HANDLED +#define IRQ_NONE +#endif + +#ifndef IRQF_PROBE_SHARED +#ifdef SA_PROBEIRQ +#define IRQF_PROBE_SHARED SA_PROBEIRQ +#else +#define IRQF_PROBE_SHARED 0 +#endif +#endif + +#ifndef IRQF_SHARED +#define IRQF_SHARED SA_SHIRQ +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#ifndef FIELD_SIZEOF +#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#endif + +#ifndef skb_is_gso +#ifdef NETIF_F_TSO +#define skb_is_gso _kc_skb_is_gso +static inline int _kc_skb_is_gso(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_size; +} +#else +#define skb_is_gso(a) 0 +#endif +#endif + +#ifndef resource_size_t +#define resource_size_t unsigned long +#endif + +#ifdef skb_pad +#undef skb_pad +#endif +#define skb_pad(x,y) _kc_skb_pad(x, y) +int _kc_skb_pad(struct sk_buff *skb, int pad); +#ifdef skb_padto +#undef skb_padto +#endif +#define skb_padto(x,y) _kc_skb_padto(x, y) +static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + if(likely(size >= len)) + return 0; + return _kc_skb_pad(skb, len - size); +} + +#ifndef DECLARE_PCI_UNMAP_ADDR +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + u32 LEN_NAME +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) +#endif /* DECLARE_PCI_UNMAP_ADDR */ +#endif /* < 2.6.18 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,0))) +#define i_private u.generic_ip +#endif /* >= RHEL 5.0 */ + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#endif +#ifndef __ALIGN_MASK +#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#endif +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) ) +#if (!((RHEL_RELEASE_CODE && \ + ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \ + RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))))) +typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *); +#endif +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +#undef CONFIG_INET_LRO +#undef CONFIG_INET_LRO_MODULE +#ifdef IXGBE_FCOE +#undef CONFIG_FCOE +#undef CONFIG_FCOE_MODULE +#endif /* IXGBE_FCOE */ +#endif +typedef irqreturn_t (*new_handler_t)(int, void*); +static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#else /* 2.4.x */ +typedef void (*irq_handler_t)(int, void*, struct pt_regs *); +typedef void (*new_handler_t)(int, void*); +static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#endif /* >= 2.5.x */ +{ + irq_handler_t new_handler = (irq_handler_t) handler; + return request_irq(irq, new_handler, flags, devname, dev_id); +} + +#undef request_irq +#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id)) + +#define irq_handler_t new_handler_t +/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */ +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +#define PCIE_CONFIG_SPACE_LEN 256 +#define PCI_CONFIG_SPACE_LEN 64 +#define PCIE_LINK_STATUS 0x12 +#define pci_config_space_ich8lan() do {} while(0) +#undef pci_save_state +extern int _kc_pci_save_state(struct pci_dev *); +#define pci_save_state(pdev) _kc_pci_save_state(pdev) +#undef pci_restore_state +extern void _kc_pci_restore_state(struct pci_dev *); +#define pci_restore_state(pdev) _kc_pci_restore_state(pdev) +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +#undef free_netdev +extern void _kc_free_netdev(struct net_device *); +#define free_netdev(netdev) _kc_free_netdev(netdev) +#endif +static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + return 0; +} +#define pci_disable_pcie_error_reporting(dev) do {} while (0) +#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0) + +extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp); +#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp) +#ifndef bool +#define bool _Bool +#define true 1 +#define false 0 +#endif +#else /* 2.6.19 */ +#include +#include +#endif /* < 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) ) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) ) +#undef INIT_WORK +#define INIT_WORK(_work, _func) \ +do { \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->pending = 0; \ + (_work)->func = (void (*)(void *))_func; \ + (_work)->data = _work; \ + init_timer(&(_work)->timer); \ +} while (0) +#endif + +#ifndef PCI_VDEVICE +#define PCI_VDEVICE(ven, dev) \ + PCI_VENDOR_ID_##ven, (dev), \ + PCI_ANY_ID, PCI_ANY_ID, 0, 0 +#endif + +#ifndef PCI_VENDOR_ID_INTEL +#define PCI_VENDOR_ID_INTEL 0x8086 +#endif + +#ifndef round_jiffies +#define round_jiffies(x) x +#endif + +#define csum_offset csum + +#define HAVE_EARLY_VMALLOC_NODE +#define dev_to_node(dev) -1 +#undef set_dev_node +/* remove compiler warning with b=b, for unused variable */ +#define set_dev_node(a, b) do { (b) = (b); } while(0) + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; +#endif + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} +#endif + +#else /* < 2.6.20 */ +#define HAVE_DEVICE_NUMA_NODE +#endif /* < 2.6.20 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define to_net_dev(class) container_of(class, struct net_device, class_dev) +#define NETDEV_CLASS_DEV +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) +#define vlan_group_get_device(vg, id) (vg->vlan_devices[id]) +#define vlan_group_set_device(vg, id, dev) \ + do { \ + if (vg) vg->vlan_devices[id] = dev; \ + } while (0) +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ +#define pci_channel_offline(pdev) (pdev->error_state && \ + pdev->error_state != pci_channel_io_normal) +#define pci_request_selected_regions(pdev, bars, name) \ + pci_request_regions(pdev, name) +#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev); + +#ifndef __aligned +#define __aligned(x) __attribute__((aligned(x))) +#endif + +extern struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev); +#define netdev_to_dev(netdev) \ + pci_dev_to_dev(_kc_netdev_to_pdev(netdev)) +#else +static inline struct device *netdev_to_dev(struct net_device *netdev) +{ + return &netdev->dev; +} + +#endif /* < 2.6.21 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define tcp_hdr(skb) (skb->h.th) +#define tcp_hdrlen(skb) (skb->h.th->doff << 2) +#define skb_transport_offset(skb) (skb->h.raw - skb->data) +#define skb_transport_header(skb) (skb->h.raw) +#define ipv6_hdr(skb) (skb->nh.ipv6h) +#define ip_hdr(skb) (skb->nh.iph) +#define skb_network_offset(skb) (skb->nh.raw - skb->data) +#define skb_network_header(skb) (skb->nh.raw) +#define skb_tail_pointer(skb) skb->tail +#define skb_reset_tail_pointer(skb) \ + do { \ + skb->tail = skb->data; \ + } while (0) +#define skb_set_tail_pointer(skb, offset) \ + do { \ + skb->tail = skb->data + offset; \ + } while (0) +#define skb_copy_to_linear_data(skb, from, len) \ + memcpy(skb->data, from, len) +#define skb_copy_to_linear_data_offset(skb, offset, from, len) \ + memcpy(skb->data + offset, from, len) +#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw) +#define pci_register_driver pci_module_init +#define skb_mac_header(skb) skb->mac.raw + +#ifdef NETIF_F_MULTI_QUEUE +#ifndef alloc_etherdev_mq +#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a) +#endif +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef ETH_FCS_LEN +#define ETH_FCS_LEN 4 +#endif +#define cancel_work_sync(x) flush_scheduled_work() +#ifndef udp_hdr +#define udp_hdr _udp_hdr +static inline struct udphdr *_udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif + +#ifdef cpu_to_be16 +#undef cpu_to_be16 +#endif +#define cpu_to_be16(x) __constant_htons(x) + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1))) +enum { + DUMP_PREFIX_NONE, + DUMP_PREFIX_ADDRESS, + DUMP_PREFIX_OFFSET +}; +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */ +#ifndef hex_asc +#define hex_asc(x) "0123456789abcdef"[x] +#endif +#include +extern void _kc_print_hex_dump(const char *level, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); +#define print_hex_dump(lvl, s, t, r, g, b, l, a) \ + _kc_print_hex_dump(lvl, s, t, r, g, b, l, a) +#ifndef ADVERTISED_2500baseX_Full +#define ADVERTISED_2500baseX_Full (1 << 15) +#endif +#ifndef SUPPORTED_2500baseX_Full +#define SUPPORTED_2500baseX_Full (1 << 15) +#endif + +#ifdef HAVE_I2C_SUPPORT +#include +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) +struct i2c_board_info { + char driver_name[KOBJ_NAME_LEN]; + char type[I2C_NAME_SIZE]; + unsigned short flags; + unsigned short addr; + void *platform_data; +}; +#define I2C_BOARD_INFO(driver, dev_addr) .driver_name = (driver),\ + .addr = (dev_addr) +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ +#define i2c_new_device(adap, info) _kc_i2c_new_device(adap, info) +extern struct i2c_client * +_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); +#endif /* HAVE_I2C_SUPPORT */ + +#else /* 2.6.22 */ +#define ETH_TYPE_TRANS_SETS_DEV +#define HAVE_NETDEV_STATS_IN_NETDEV +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) ) +#undef SET_MODULE_OWNER +#define SET_MODULE_OWNER(dev) do { } while (0) +#endif /* > 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) +#define netif_subqueue_stopped(_a, _b) 0 +#ifndef PTR_ALIGN +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#endif + +#ifndef CONFIG_PM_SLEEP +#define CONFIG_PM_SLEEP CONFIG_PM +#endif + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) ) +#define HAVE_ETHTOOL_GET_PERM_ADDR +#endif /* 2.6.14 through 2.6.22 */ +#endif /* < 2.6.23 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifndef ETH_FLAG_LRO +#define ETH_FLAG_LRO NETIF_F_LRO +#endif + +/* if GRO is supported then the napi struct must already exist */ +#ifndef NETIF_F_GRO +/* NAPI API changes in 2.6.24 break everything */ +struct napi_struct { + /* used to look up the real NAPI polling routine */ + int (*poll)(struct napi_struct *, int); + struct net_device *dev; + int weight; +}; +#endif + +#ifdef NAPI +extern int __kc_adapter_clean(struct net_device *, int *); +extern struct net_device *napi_to_poll_dev(const struct napi_struct *napi); +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = (_napi); \ + struct net_device *poll_dev = napi_to_poll_dev(__napi); \ + poll_dev->poll = &(__kc_adapter_clean); \ + poll_dev->priv = (_napi); \ + poll_dev->weight = (_weight); \ + set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \ + set_bit(__LINK_STATE_START, &poll_dev->state);\ + dev_hold(poll_dev); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_napi) \ + do { \ + struct net_device *poll_dev = napi_to_poll_dev(_napi); \ + WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \ + dev_put(poll_dev); \ + memset(poll_dev, 0, sizeof(struct net_device));\ + } while (0) +#define napi_schedule_prep(_napi) \ + (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi))) +#define napi_schedule(_napi) \ + do { \ + if (napi_schedule_prep(_napi)) \ + __netif_rx_schedule(napi_to_poll_dev(_napi)); \ + } while (0) +#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi)) +#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi)) +#ifdef CONFIG_SMP +static inline void napi_synchronize(const struct napi_struct *n) +{ + struct net_device *dev = napi_to_poll_dev(n); + + while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { + /* No hurry. */ + msleep(1); + } +} +#else +#define napi_synchronize(n) barrier() +#endif /* CONFIG_SMP */ +#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi)) +#ifndef NETIF_F_GRO +#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi)) +#else +#define napi_complete(_napi) \ + do { \ + napi_gro_flush(_napi); \ + netif_rx_complete(napi_to_poll_dev(_napi)); \ + } while (0) +#endif /* NETIF_F_GRO */ +#else /* NAPI */ +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = _napi; \ + _netdev->poll = &(_poll); \ + _netdev->weight = (_weight); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_a) do {} while (0) +#endif /* NAPI */ + +#undef dev_get_by_name +#define dev_get_by_name(_a, _b) dev_get_by_name(_b) +#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b) +#ifndef DMA_BIT_MASK +#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1)) +#endif + +#ifdef NETIF_F_TSO6 +#define skb_is_gso_v6 _kc_skb_is_gso_v6 +static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; +} +#endif /* NETIF_F_TSO6 */ + +#ifndef KERN_CONT +#define KERN_CONT "" +#endif +#ifndef pr_err +#define pr_err(fmt, arg...) \ + printk(KERN_ERR fmt, ##arg) +#endif +#else /* < 2.6.24 */ +#define HAVE_ETHTOOL_GET_SSET_COUNT +#define HAVE_NETDEV_NAPI_LIST +#endif /* < 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#include +#else /* >= 3.2.0 */ +#include +#endif /* else >= 3.2.0 */ +#endif /* > 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) ) +#define PM_QOS_CPU_DMA_LATENCY 1 + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) ) +#include +#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY +#define pm_qos_add_requirement(pm_qos_class, name, value) \ + set_acceptable_latency(name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) \ + remove_acceptable_latency(name) +#define pm_qos_update_requirement(pm_qos_class, name, value) \ + modify_acceptable_latency(name, value) +#else +#define PM_QOS_DEFAULT_VALUE -1 +#define pm_qos_add_requirement(pm_qos_class, name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) +#define pm_qos_update_requirement(pm_qos_class, name, value) { \ + if (value != PM_QOS_DEFAULT_VALUE) { \ + printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \ + pci_name(adapter->pdev)); \ + } \ +} + +#endif /* > 2.6.18 */ + +#define pci_enable_device_mem(pdev) pci_enable_device(pdev) + +#ifndef DEFINE_PCI_DEVICE_TABLE +#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[] +#endif /* DEFINE_PCI_DEVICE_TABLE */ + + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) +#ifndef IGB_PROCFS +#define IGB_PROCFS +#endif /* IGB_PROCFS */ +#endif /* >= 2.6.0 */ + +#else /* < 2.6.25 */ + + +#if IS_ENABLED(CONFIG_HWMON) +#ifndef IGB_HWMON +#define IGB_HWMON +#endif /* IGB_HWMON */ +#endif /* CONFIG_HWMON */ + +#endif /* < 2.6.25 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +#ifndef clamp_t +#define clamp_t(type, val, min, max) ({ \ + type __val = (val); \ + type __min = (min); \ + type __max = (max); \ + __val = __val < __min ? __min : __val; \ + __val > __max ? __max : __val; }) +#endif /* clamp_t */ +#undef kzalloc_node +#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags) + +extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state); +#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s) +#else /* < 2.6.26 */ +#include +#define HAVE_NETDEV_VLAN_FEATURES +#ifndef PCI_EXP_LNKCAP_ASPMS +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#endif /* PCI_EXP_LNKCAP_ASPMS */ +#endif /* < 2.6.26 */ +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep, + __u32 speed) +{ + ep->speed = (__u16)speed; + /* ep->speed_hi = (__u16)(speed >> 16); */ +} +#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set + +static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep) +{ + /* no speed_hi before 2.6.27, and probably no need for it yet */ + return (__u32)ep->speed; +} +#define ethtool_cmd_speed _kc_ethtool_cmd_speed + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) ) +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM)) +#define ANCIENT_PM 1 +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \ + defined(CONFIG_PM_SLEEP)) +#define NEWER_PM 1 +#endif +#if defined(ANCIENT_PM) || defined(NEWER_PM) +#undef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) \ + do { \ + u16 pmc = 0; \ + int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \ + if (pm) { \ + pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \ + &pmc); \ + } \ + (dev)->power.can_wakeup = !!(pmc >> 11); \ + (dev)->power.should_wakeup = (val && (pmc >> 11)); \ + } while (0) +#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */ +#endif /* 2.6.15 through 2.6.27 */ +#ifndef netif_napi_del +#define netif_napi_del(_a) do {} while (0) +#ifdef NAPI +#ifdef CONFIG_NETPOLL +#undef netif_napi_del +#define netif_napi_del(_a) list_del(&(_a)->dev_list); +#endif +#endif +#endif /* netif_napi_del */ +#ifdef dma_mapping_error +#undef dma_mapping_error +#endif +#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr) + +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#define HAVE_TX_MQ +#endif + +#ifdef HAVE_TX_MQ +extern void _kc_netif_tx_stop_all_queues(struct net_device *); +extern void _kc_netif_tx_wake_all_queues(struct net_device *); +extern void _kc_netif_tx_start_all_queues(struct net_device *); +#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a) +#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a) +#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a) +#undef netif_stop_subqueue +#define netif_stop_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_stop_subqueue((_ndev), (_qi)); \ + else \ + netif_stop_queue((_ndev)); \ + } while (0) +#undef netif_start_subqueue +#define netif_start_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_start_subqueue((_ndev), (_qi)); \ + else \ + netif_start_queue((_ndev)); \ + } while (0) +#else /* HAVE_TX_MQ */ +#define netif_tx_stop_all_queues(a) netif_stop_queue(a) +#define netif_tx_wake_all_queues(a) netif_wake_queue(a) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) ) +#define netif_tx_start_all_queues(a) netif_start_queue(a) +#else +#define netif_tx_start_all_queues(a) do {} while (0) +#endif +#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev)) +#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev)) +#endif /* HAVE_TX_MQ */ +#ifndef NETIF_F_MULTI_QUEUE +#define NETIF_F_MULTI_QUEUE 0 +#define netif_is_multiqueue(a) 0 +#define netif_wake_subqueue(a, b) +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef __WARN_printf +extern void __kc_warn_slowpath(const char *file, const int line, + const char *fmt, ...) __attribute__((format(printf, 3, 4))); +#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg) +#endif /* __WARN_printf */ + +#ifndef WARN +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf(format); \ + unlikely(__ret_warn_on); \ +}) +#endif /* WARN */ +#undef HAVE_IXGBE_DEBUG_FS +#undef HAVE_IGB_DEBUG_FS +#else /* < 2.6.27 */ +#define HAVE_TX_MQ +#define HAVE_NETDEV_SELECT_QUEUE +#ifdef CONFIG_DEBUG_FS +#define HAVE_IXGBE_DEBUG_FS +#define HAVE_IGB_DEBUG_FS +#endif /* CONFIG_DEBUG_FS */ +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) +#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \ + pci_resource_len(pdev, bar)) +#define pci_wake_from_d3 _kc_pci_wake_from_d3 +#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep +extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable); +extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev); +#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC) +#ifndef __skb_queue_head_init +static inline void __kc_skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = list->next = (struct sk_buff *)list; + list->qlen = 0; +} +#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q) +#endif + +#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ + +#endif /* < 2.6.28 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) +#ifndef swap +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +#endif +#define pci_request_selected_regions_exclusive(pdev, bars, name) \ + pci_request_selected_regions(pdev, bars, name) +#ifndef CONFIG_NR_CPUS +#define CONFIG_NR_CPUS 1 +#endif /* CONFIG_NR_CPUS */ +#ifndef pcie_aspm_enabled +#define pcie_aspm_enabled() (1) +#endif /* pcie_aspm_enabled */ + +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ + +#ifndef pci_clear_master +extern void _kc_pci_clear_master(struct pci_dev *dev); +#define pci_clear_master(dev) _kc_pci_clear_master(dev) +#endif + +#ifndef PCI_EXP_LNKCTL_ASPMC +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#endif +#else /* < 2.6.29 */ +#ifndef HAVE_NET_DEVICE_OPS +#define HAVE_NET_DEVICE_OPS +#endif +#ifdef CONFIG_DCB +#define HAVE_PFC_MODE_ENABLE +#endif /* CONFIG_DCB */ +#endif /* < 2.6.29 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) +#define skb_rx_queue_recorded(a) false +#define skb_get_rx_queue(a) 0 +#define skb_record_rx_queue(a, b) do {} while (0) +#define skb_tx_hash(n, s) ___kc_skb_tx_hash((n), (s), (n)->real_num_tx_queues) +#ifndef CONFIG_PCI_IOV +#undef pci_enable_sriov +#define pci_enable_sriov(a, b) -ENOTSUPP +#undef pci_disable_sriov +#define pci_disable_sriov(a) do {} while (0) +#endif /* CONFIG_PCI_IOV */ +#ifndef pr_cont +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) +#endif /* pr_cont */ +static inline void _kc_synchronize_irq(unsigned int a) +{ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) + synchronize_irq(); +#else /* < 2.5.28 */ + synchronize_irq(a); +#endif /* < 2.5.28 */ +} +#undef synchronize_irq +#define synchronize_irq(a) _kc_synchronize_irq(a) + +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ + +#else /* < 2.6.30 */ +#define HAVE_ASPM_QUIRKS +#endif /* < 2.6.30 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) ) +#define ETH_P_1588 0x88F7 +#define ETH_P_FIP 0x8914 +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc_count) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(uclist, dev) \ + for (uclist = dev->uc_list; uclist; uclist = uclist->next) +#endif +#ifndef PORT_OTHER +#define PORT_OTHER 0xff +#endif +#ifndef MDIO_PHY_ID_PRTAD +#define MDIO_PHY_ID_PRTAD 0x03e0 +#endif +#ifndef MDIO_PHY_ID_DEVAD +#define MDIO_PHY_ID_DEVAD 0x001f +#endif +#ifndef skb_dst +#define skb_dst(s) ((s)->dst) +#endif + +#ifndef SUPPORTED_1000baseKX_Full +#define SUPPORTED_1000baseKX_Full (1 << 17) +#endif +#ifndef SUPPORTED_10000baseKX4_Full +#define SUPPORTED_10000baseKX4_Full (1 << 18) +#endif +#ifndef SUPPORTED_10000baseKR_Full +#define SUPPORTED_10000baseKR_Full (1 << 19) +#endif + +#ifndef ADVERTISED_1000baseKX_Full +#define ADVERTISED_1000baseKX_Full (1 << 17) +#endif +#ifndef ADVERTISED_10000baseKX4_Full +#define ADVERTISED_10000baseKX4_Full (1 << 18) +#endif +#ifndef ADVERTISED_10000baseKR_Full +#define ADVERTISED_10000baseKR_Full (1 << 19) +#endif + +#else /* < 2.6.31 */ +#ifndef HAVE_NETDEV_STORAGE_ADDRESS +#define HAVE_NETDEV_STORAGE_ADDRESS +#endif +#ifndef HAVE_NETDEV_HW_ADDR +#define HAVE_NETDEV_HW_ADDR +#endif +#ifndef HAVE_TRANS_START_IN_QUEUE +#define HAVE_TRANS_START_IN_QUEUE +#endif +#ifndef HAVE_INCLUDE_LINUX_MDIO_H +#define HAVE_INCLUDE_LINUX_MDIO_H +#endif +#endif /* < 2.6.31 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) ) +#undef netdev_tx_t +#define netdev_tx_t int +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef NETIF_F_FCOE_MTU +#define NETIF_F_FCOE_MTU (1 << 26) +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +static inline int _kc_pm_runtime_get_sync() +{ + return 1; +} +#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync() +#else /* 2.6.0 => 2.6.32 */ +static inline int _kc_pm_runtime_get_sync(struct device *dev) +{ + return 1; +} +#ifndef pm_runtime_get_sync +#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync(dev) +#endif +#endif /* 2.6.0 => 2.6.32 */ +#ifndef pm_runtime_put +#define pm_runtime_put(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_sync +#define pm_runtime_put_sync(dev) do {} while (0) +#endif +#ifndef pm_runtime_resume +#define pm_runtime_resume(dev) do {} while (0) +#endif +#ifndef pm_schedule_suspend +#define pm_schedule_suspend(dev, t) do {} while (0) +#endif +#ifndef pm_runtime_set_suspended +#define pm_runtime_set_suspended(dev) do {} while (0) +#endif +#ifndef pm_runtime_disable +#define pm_runtime_disable(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_noidle +#define pm_runtime_put_noidle(dev) do {} while (0) +#endif +#ifndef pm_runtime_set_active +#define pm_runtime_set_active(dev) do {} while (0) +#endif +#ifndef pm_runtime_enable +#define pm_runtime_enable(dev) do {} while (0) +#endif +#ifndef pm_runtime_get_noresume +#define pm_runtime_get_noresume(dev) do {} while (0) +#endif +#else /* < 2.6.32 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE +#define HAVE_NETDEV_OPS_FCOE_ENABLE +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_OPS_GETAPP +#define HAVE_DCBNL_OPS_GETAPP +#endif +#endif /* CONFIG_DCB */ +#include +/* IOV bad DMA target work arounds require at least this kernel rev support */ +#define HAVE_PCIE_TYPE +#endif /* < 2.6.32 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) ) +#ifndef pci_pcie_cap +#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP) +#endif +#ifndef IPV4_FLOW +#define IPV4_FLOW 0x10 +#endif /* IPV4_FLOW */ +#ifndef IPV6_FLOW +#define IPV6_FLOW 0x11 +#endif /* IPV6_FLOW */ +/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */ +#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) ) +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#endif /* RHEL6 or SLES11 SP1 */ +#ifndef __percpu +#define __percpu +#endif /* __percpu */ +#ifndef PORT_DA +#define PORT_DA PORT_OTHER +#endif +#ifndef PORT_NONE +#define PORT_NONE PORT_OTHER +#endif + +#if ((RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))) +#if !defined(CONFIG_X86_32) && !defined(CONFIG_NEED_DMA_MAP_STATE) +#undef DEFINE_DMA_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME +#undef DEFINE_DMA_UNMAP_LEN +#define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME +#undef dma_unmap_addr +#define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) +#undef dma_unmap_addr_set +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) (((PTR)->ADDR_NAME) = (VAL)) +#undef dma_unmap_len +#define dma_unmap_len(PTR, LEN_NAME) ((PTR)->LEN_NAME) +#undef dma_unmap_len_set +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) (((PTR)->LEN_NAME) = (VAL)) +#endif /* CONFIG_X86_64 && !CONFIG_NEED_DMA_MAP_STATE */ +#endif /* RHEL_RELEASE_CODE */ + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,8)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))) || \ + ((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))))) +static inline bool pci_is_pcie(struct pci_dev *dev) +{ + return !!pci_pcie_cap(dev); +} +#endif /* RHEL_RELEASE_CODE */ + +#ifndef __always_unused +#define __always_unused __attribute__((__unused__)) +#endif +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + +#if (!(RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)))) +#define sk_tx_queue_get(_sk) (-1) +#define sk_tx_queue_set(_sk, _tx_queue) do {} while(0) +#endif /* !(RHEL >= 6.2) */ + +#if (RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,4)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) +#define HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#define HAVE_ETHTOOL_SET_PHYS_ID +#define HAVE_ETHTOOL_GET_TS_INFO +#endif /* RHEL >= 6.4 && RHEL < 7.0 */ + +#if (RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) +#define HAVE_RHEL6_NETDEV_OPS_EXT_FDB +#endif /* RHEL >= 6.5 && RHEL < 7.0 */ + +#else /* < 2.6.33 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#endif /* < 2.6.33 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) +#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +#ifndef pci_num_vf +#define pci_num_vf(pdev) _kc_pci_num_vf(pdev) +extern int _kc_pci_num_vf(struct pci_dev *dev); +#endif +#endif /* RHEL_RELEASE_CODE */ + +#ifndef ETH_FLAG_NTUPLE +#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE +#endif + +#ifndef netdev_mc_count +#define netdev_mc_count(dev) ((dev)->mc_count) +#endif +#ifndef netdev_mc_empty +#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) +#endif +#ifndef netdev_for_each_mc_addr +#define netdev_for_each_mc_addr(mclist, dev) \ + for (mclist = dev->mc_list; mclist; mclist = mclist->next) +#endif +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc.count) +#endif +#ifndef netdev_uc_empty +#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(ha, dev) \ + list_for_each_entry(ha, &dev->uc.list, list) +#endif +#ifndef dma_set_coherent_mask +#define dma_set_coherent_mask(dev,mask) \ + pci_set_consistent_dma_mask(to_pci_dev(dev),(mask)) +#endif +#ifndef pci_dev_run_wake +#define pci_dev_run_wake(pdev) (0) +#endif + +/* netdev logging taken from include/linux/netdevice.h */ +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (dev->reg_state != NETREG_REGISTERED) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#undef netdev_printk +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \ + printk(level "%s: " format, pci_name(pdev), ##args); \ +} while(0) +#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \ + struct device *dev = pci_dev_to_dev(pdev); \ + dev_printk(level, dev, "%s: " format, \ + netdev_name(netdev), ##args); \ +} while(0) +#else /* 2.6.21 => 2.6.34 */ +#define netdev_printk(level, netdev, format, args...) \ + dev_printk(level, (netdev)->dev.parent, \ + "%s: " format, \ + netdev_name(netdev), ##args) +#endif /* <2.6.0 <2.6.21 <2.6.34 */ +#undef netdev_emerg +#define netdev_emerg(dev, format, args...) \ + netdev_printk(KERN_EMERG, dev, format, ##args) +#undef netdev_alert +#define netdev_alert(dev, format, args...) \ + netdev_printk(KERN_ALERT, dev, format, ##args) +#undef netdev_crit +#define netdev_crit(dev, format, args...) \ + netdev_printk(KERN_CRIT, dev, format, ##args) +#undef netdev_err +#define netdev_err(dev, format, args...) \ + netdev_printk(KERN_ERR, dev, format, ##args) +#undef netdev_warn +#define netdev_warn(dev, format, args...) \ + netdev_printk(KERN_WARNING, dev, format, ##args) +#undef netdev_notice +#define netdev_notice(dev, format, args...) \ + netdev_printk(KERN_NOTICE, dev, format, ##args) +#undef netdev_info +#define netdev_info(dev, format, args...) \ + netdev_printk(KERN_INFO, dev, format, ##args) +#undef netdev_dbg +#if defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) +#elif defined(CONFIG_DYNAMIC_DEBUG) +#define netdev_dbg(__dev, format, args...) \ +do { \ + dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \ + netdev_name(__dev), ##args); \ +} while (0) +#else /* DEBUG */ +#define netdev_dbg(__dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args); \ + 0; \ +}) +#endif /* DEBUG */ + +#undef netif_printk +#define netif_printk(priv, type, level, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_printk(level, (dev), fmt, ##args); \ +} while (0) + +#undef netif_emerg +#define netif_emerg(priv, type, dev, fmt, args...) \ + netif_level(emerg, priv, type, dev, fmt, ##args) +#undef netif_alert +#define netif_alert(priv, type, dev, fmt, args...) \ + netif_level(alert, priv, type, dev, fmt, ##args) +#undef netif_crit +#define netif_crit(priv, type, dev, fmt, args...) \ + netif_level(crit, priv, type, dev, fmt, ##args) +#undef netif_err +#define netif_err(priv, type, dev, fmt, args...) \ + netif_level(err, priv, type, dev, fmt, ##args) +#undef netif_warn +#define netif_warn(priv, type, dev, fmt, args...) \ + netif_level(warn, priv, type, dev, fmt, ##args) +#undef netif_notice +#define netif_notice(priv, type, dev, fmt, args...) \ + netif_level(notice, priv, type, dev, fmt, ##args) +#undef netif_info +#define netif_info(priv, type, dev, fmt, args...) \ + netif_level(info, priv, type, dev, fmt, ##args) +#undef netif_dbg +#define netif_dbg(priv, type, dev, fmt, args...) \ + netif_level(dbg, priv, type, dev, fmt, ##args) + +#ifdef SET_SYSTEM_SLEEP_PM_OPS +#define HAVE_SYSTEM_SLEEP_PM_OPS +#endif + +#ifndef for_each_set_bit +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) +#endif /* for_each_set_bit */ + +#ifndef DEFINE_DMA_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN +#define dma_unmap_addr pci_unmap_addr +#define dma_unmap_addr_set pci_unmap_addr_set +#define dma_unmap_len pci_unmap_len +#define dma_unmap_len_set pci_unmap_len_set +#endif /* DEFINE_DMA_UNMAP_ADDR */ + +#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,3)) +#ifdef IGB_HWMON +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define sysfs_attr_init(attr) \ + do { \ + static struct lock_class_key __key; \ + (attr)->key = &__key; \ + } while (0) +#else +#define sysfs_attr_init(attr) do {} while (0) +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* IGB_HWMON */ +#endif /* RHEL_RELEASE_CODE */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +static inline bool _kc_pm_runtime_suspended() +{ + return false; +} +#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended() +#else /* 2.6.0 => 2.6.34 */ +static inline bool _kc_pm_runtime_suspended(struct device *dev) +{ + return false; +} +#ifndef pm_runtime_suspended +#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended(dev) +#endif +#endif /* 2.6.0 => 2.6.34 */ + +#else /* < 2.6.34 */ +#define HAVE_SYSTEM_SLEEP_PM_OPS +#ifndef HAVE_SET_RX_MODE +#define HAVE_SET_RX_MODE +#endif + +#endif /* < 2.6.34 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) + +ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, + const void __user *from, size_t count); +#define simple_write_to_buffer _kc_simple_write_to_buffer + +#ifndef numa_node_id +#define numa_node_id() 0 +#endif +#ifdef HAVE_TX_MQ +#include +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) +void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int); +#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues +#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ +#else /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#define netif_set_real_num_tx_queues(_netdev, _count) \ + do { \ + (_netdev)->egress_subqueue_count = _count; \ + } while (0) +#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#else /* HAVE_TX_MQ */ +#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0) +#endif /* HAVE_TX_MQ */ +#ifndef ETH_FLAG_RXHASH +#define ETH_FLAG_RXHASH (1<<28) +#endif /* ETH_FLAG_RXHASH */ +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) +#define HAVE_IRQ_AFFINITY_HINT +#endif +#else /* < 2.6.35 */ +#define HAVE_PM_QOS_REQUEST_LIST +#define HAVE_IRQ_AFFINITY_HINT +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32); +#define ethtool_op_set_flags _kc_ethtool_op_set_flags +extern u32 _kc_ethtool_op_get_flags(struct net_device *); +#define ethtool_op_get_flags _kc_ethtool_op_get_flags + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#ifdef NET_IP_ALIGN +#undef NET_IP_ALIGN +#endif +#define NET_IP_ALIGN 0 +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#ifdef NET_SKB_PAD +#undef NET_SKB_PAD +#endif + +#if (L1_CACHE_BYTES > 32) +#define NET_SKB_PAD L1_CACHE_BYTES +#else +#define NET_SKB_PAD 32 +#endif + +static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC); + if (skb) { +#if (NET_IP_ALIGN + NET_SKB_PAD) + skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); +#endif + skb->dev = dev; + } + return skb; +} + +#ifdef netdev_alloc_skb_ip_align +#undef netdev_alloc_skb_ip_align +#endif +#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l) + +#undef netif_level +#define netif_level(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#undef usleep_range +#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000)) + +#define u64_stats_update_begin(a) do { } while(0) +#define u64_stats_update_end(a) do { } while(0) +#define u64_stats_fetch_begin(a) do { } while(0) +#define u64_stats_fetch_retry_bh(a) (0) +#define u64_stats_fetch_begin_bh(a) (0) + +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) +#define HAVE_8021P_SUPPORT +#endif + +#else /* < 2.6.36 */ + + +#define HAVE_PM_QOS_REQUEST_ACTIVE +#define HAVE_8021P_SUPPORT +#define HAVE_NDO_GET_STATS64 +#endif /* < 2.6.36 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) ) +#ifndef netif_set_real_num_rx_queues +static inline int __kc_netif_set_real_num_rx_queues(struct net_device *dev, + unsigned int rxq) +{ + return 0; +} +#define netif_set_real_num_rx_queues(dev, rxq) \ + __kc_netif_set_real_num_rx_queues((dev), (rxq)) +#endif +#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) +#endif +#ifndef VLAN_N_VID +#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN +#endif /* VLAN_N_VID */ +#ifndef ETH_FLAG_TXVLAN +#define ETH_FLAG_TXVLAN (1 << 7) +#endif /* ETH_FLAG_TXVLAN */ +#ifndef ETH_FLAG_RXVLAN +#define ETH_FLAG_RXVLAN (1 << 8) +#endif /* ETH_FLAG_RXVLAN */ + +static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb) +{ + WARN_ON(skb->ip_summed != CHECKSUM_NONE); +} +#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb) + +static inline void *_kc_vzalloc_node(unsigned long size, int node) +{ + void *addr = vmalloc_node(size, node); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node) + +static inline void *_kc_vzalloc(unsigned long size) +{ + void *addr = vmalloc(size); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc(_size) _kc_vzalloc(_size) + +#ifndef vlan_get_protocol +static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb) +{ + if (vlan_tx_tag_present(skb) || + skb->protocol != cpu_to_be16(ETH_P_8021Q)) + return skb->protocol; + + if (skb_headlen(skb) < sizeof(struct vlan_ethhdr)) + return 0; + + return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto; +} +#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb) +#endif +#ifdef HAVE_HW_TIME_STAMP +#define SKBTX_HW_TSTAMP (1 << 0) +#define SKBTX_IN_PROGRESS (1 << 2) +#define SKB_SHARED_TX_IS_UNION +#endif + +#ifndef device_wakeup_enable +#define device_wakeup_enable(dev) device_set_wakeup_enable(dev, true) +#endif + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) ) +#ifndef HAVE_VLAN_RX_REGISTER +#define HAVE_VLAN_RX_REGISTER +#endif +#endif /* > 2.4.18 */ +#endif /* < 2.6.37 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define skb_checksum_start_offset(skb) skb_transport_offset(skb) +#else /* 2.6.22 -> 2.6.37 */ +static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb) +{ + return skb->csum_start - skb_headroom(skb); +} +#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb) +#endif /* 2.6.22 -> 2.6.37 */ +#ifdef CONFIG_DCB +#ifndef IEEE_8021QAZ_MAX_TCS +#define IEEE_8021QAZ_MAX_TCS 8 +#endif +#ifndef DCB_CAP_DCBX_HOST +#define DCB_CAP_DCBX_HOST 0x01 +#endif +#ifndef DCB_CAP_DCBX_LLD_MANAGED +#define DCB_CAP_DCBX_LLD_MANAGED 0x02 +#endif +#ifndef DCB_CAP_DCBX_VER_CEE +#define DCB_CAP_DCBX_VER_CEE 0x04 +#endif +#ifndef DCB_CAP_DCBX_VER_IEEE +#define DCB_CAP_DCBX_VER_IEEE 0x08 +#endif +#ifndef DCB_CAP_DCBX_STATIC +#define DCB_CAP_DCBX_STATIC 0x10 +#endif +#endif /* CONFIG_DCB */ +#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) +#define CONFIG_XPS +#endif /* RHEL_RELEASE_VERSION(6,2) */ +#endif /* < 2.6.38 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#ifndef NETIF_F_RXCSUM +#define NETIF_F_RXCSUM (1 << 29) +#endif +#ifndef skb_queue_reverse_walk_safe +#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ + for (skb = (queue)->prev, tmp = skb->prev; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->prev) +#endif +#else /* < 2.6.39 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifndef HAVE_MQPRIO +#define HAVE_MQPRIO +#endif +#ifndef HAVE_SETUP_TC +#define HAVE_SETUP_TC +#endif +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_IEEE +#define HAVE_DCBNL_IEEE +#endif +#endif /* CONFIG_DCB */ +#ifndef HAVE_NDO_SET_FEATURES +#define HAVE_NDO_SET_FEATURES +#endif +#endif /* < 2.6.39 */ + +/*****************************************************************************/ +/* use < 2.6.40 because of a Fedora 15 kernel update where they + * updated the kernel version to 2.6.40.x and they back-ported 3.0 features + * like set_phys_id for ethtool. + */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) ) +#ifdef ETHTOOL_GRXRINGS +#ifndef FLOW_EXT +#define FLOW_EXT 0x80000000 +union _kc_ethtool_flow_union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + __u8 hdata[60]; +}; +struct _kc_ethtool_flow_ext { + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; +}; +struct _kc_ethtool_rx_flow_spec { + __u32 flow_type; + union _kc_ethtool_flow_union h_u; + struct _kc_ethtool_flow_ext h_ext; + union _kc_ethtool_flow_union m_u; + struct _kc_ethtool_flow_ext m_ext; + __u64 ring_cookie; + __u32 location; +}; +#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec +#endif /* FLOW_EXT */ +#endif + +#define pci_disable_link_state_locked pci_disable_link_state + +#ifndef PCI_LTR_VALUE_MASK +#define PCI_LTR_VALUE_MASK 0x000003ff +#endif +#ifndef PCI_LTR_SCALE_MASK +#define PCI_LTR_SCALE_MASK 0x00001c00 +#endif +#ifndef PCI_LTR_SCALE_SHIFT +#define PCI_LTR_SCALE_SHIFT 10 +#endif + +#else /* < 2.6.40 */ +#define HAVE_ETHTOOL_SET_PHYS_ID +#endif /* < 2.6.40 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) ) +#define USE_LEGACY_PM_SUPPORT +#endif /* < 3.0.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#ifndef __netdev_alloc_skb_ip_align +#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l) +#endif /* __netdev_alloc_skb_ip_align */ +#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app) +#define dcb_ieee_delapp(dev, app) 0 +#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority) + +/* 1000BASE-T Control register */ +#define CTL1000_AS_MASTER 0x0800 +#define CTL1000_ENABLE_MASTER 0x1000 + +#else /* < 3.1.0 */ +#ifndef HAVE_DCBNL_IEEE_DELAPP +#define HAVE_DCBNL_IEEE_DELAPP +#endif +#endif /* < 3.1.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#ifdef ETHTOOL_GRXRINGS +#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS +#endif /* ETHTOOL_GRXRINGS */ + +#ifndef skb_frag_size +#define skb_frag_size(frag) _kc_skb_frag_size(frag) +static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag) +{ + return frag->size; +} +#endif /* skb_frag_size */ + +#ifndef skb_frag_size_sub +#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta) +static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta) +{ + frag->size -= delta; +} +#endif /* skb_frag_size_sub */ + +#ifndef skb_frag_page +#define skb_frag_page(frag) _kc_skb_frag_page(frag) +static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag) +{ + return frag->page; +} +#endif /* skb_frag_page */ + +#ifndef skb_frag_address +#define skb_frag_address(frag) _kc_skb_frag_address(frag) +static inline void *_kc_skb_frag_address(const skb_frag_t *frag) +{ + return page_address(skb_frag_page(frag)) + frag->page_offset; +} +#endif /* skb_frag_address */ + +#ifndef skb_frag_dma_map +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) +#include +#endif +#define skb_frag_dma_map(dev,frag,offset,size,dir) \ + _kc_skb_frag_dma_map(dev,frag,offset,size,dir) +static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + return dma_map_page(dev, skb_frag_page(frag), + frag->page_offset + offset, size, dir); +} +#endif /* skb_frag_dma_map */ + +#ifndef __skb_frag_unref +#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag) +static inline void __kc_skb_frag_unref(skb_frag_t *frag) +{ + put_page(skb_frag_page(frag)); +} +#endif /* __skb_frag_unref */ + +#ifndef SPEED_UNKNOWN +#define SPEED_UNKNOWN -1 +#endif +#ifndef DUPLEX_UNKNOWN +#define DUPLEX_UNKNOWN 0xff +#endif +#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) +#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_PCI_DEV_FLAGS_ASSIGNED +#endif +#endif +#else /* < 3.2.0 */ +#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_VF_SPOOFCHK_CONFIGURE +#endif +#endif /* < 3.2.0 */ + +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6,2)) +#undef ixgbe_get_netdev_tc_txq +#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc]) +#endif +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) ) +typedef u32 kni_netdev_features_t; +#undef PCI_EXP_TYPE_RC_EC +#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ +#ifndef CONFIG_BQL +#define netdev_tx_completed_queue(_q, _p, _b) do {} while (0) +#define netdev_completed_queue(_n, _p, _b) do {} while (0) +#define netdev_tx_sent_queue(_q, _b) do {} while (0) +#define netdev_sent_queue(_n, _b) do {} while (0) +#define netdev_tx_reset_queue(_q) do {} while (0) +#define netdev_reset_queue(_n) do {} while (0) +#endif +#else /* ! < 3.3.0 */ +typedef netdev_features_t kni_netdev_features_t; +#define HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef ETHTOOL_SRXNTUPLE +#undef ETHTOOL_SRXNTUPLE +#endif +#endif /* < 3.3.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +#ifndef NETIF_F_RXFCS +#define NETIF_F_RXFCS 0 +#endif /* NETIF_F_RXFCS */ +#ifndef NETIF_F_RXALL +#define NETIF_F_RXALL 0 +#endif /* NETIF_F_RXALL */ + +#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) +#define NUMTCS_RETURNS_U8 + +int _kc_simple_open(struct inode *inode, struct file *file); +#define simple_open _kc_simple_open +#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */ + + +#ifndef skb_add_rx_frag +#define skb_add_rx_frag _kc_skb_add_rx_frag +extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, + int, int, unsigned int); +#endif +#ifdef NET_ADDR_RANDOM +#define eth_hw_addr_random(N) do { \ + random_ether_addr(N->dev_addr); \ + N->addr_assign_type |= NET_ADDR_RANDOM; \ + } while (0) +#else /* NET_ADDR_RANDOM */ +#define eth_hw_addr_random(N) random_ether_addr(N->dev_addr) +#endif /* NET_ADDR_RANDOM */ +#else /* < 3.4.0 */ +#include +#endif /* >= 3.4.0 */ + +/*****************************************************************************/ +#if defined(E1000E_PTP) || defined(IGB_PTP) || defined(IXGBE_PTP) || defined(I40E_PTP) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) ) && IS_ENABLED(CONFIG_PTP_1588_CLOCK) +#define HAVE_PTP_1588_CLOCK +#else +#error Cannot enable PTP Hardware Clock support due to a pre-3.0 kernel version or CONFIG_PTP_1588_CLOCK not enabled in the kernel +#endif /* > 3.0.0 && IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ +#endif /* E1000E_PTP || IGB_PTP || IXGBE_PTP || I40E_PTP */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ) +#define skb_tx_timestamp(skb) do {} while (0) +static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2) +{ + return !compare_ether_addr(addr1, addr2); +} +#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2)) +#else +#define HAVE_FDB_OPS +#define HAVE_ETHTOOL_GET_TS_INFO +#endif /* < 3.5.0 */ + +/*****************************************************************************/ +#include +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) ) +#define PCI_EXP_LNKCAP2 44 /* Link Capability 2 */ + +#ifndef MDIO_EEE_100TX +#define MDIO_EEE_100TX 0x0002 /* 100TX EEE cap */ +#endif +#ifndef MDIO_EEE_1000T +#define MDIO_EEE_1000T 0x0004 /* 1000T EEE cap */ +#endif +#ifndef MDIO_EEE_10GT +#define MDIO_EEE_10GT 0x0008 /* 10GT EEE cap */ +#endif +#ifndef MDIO_EEE_1000KX +#define MDIO_EEE_1000KX 0x0010 /* 1000KX EEE cap */ +#endif +#ifndef MDIO_EEE_10GKX4 +#define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ +#endif +#ifndef MDIO_EEE_10GKR +#define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ +#endif +#endif /* < 3.6.0 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) +#ifndef ADVERTISED_40000baseKR4_Full +/* these defines were all added in one commit, so should be safe + * to trigger activiation on one define + */ +#define SUPPORTED_40000baseKR4_Full (1 << 23) +#define SUPPORTED_40000baseCR4_Full (1 << 24) +#define SUPPORTED_40000baseSR4_Full (1 << 25) +#define SUPPORTED_40000baseLR4_Full (1 << 26) +#define ADVERTISED_40000baseKR4_Full (1 << 23) +#define ADVERTISED_40000baseCR4_Full (1 << 24) +#define ADVERTISED_40000baseSR4_Full (1 << 25) +#define ADVERTISED_40000baseLR4_Full (1 << 26) +#endif + +/** + * mmd_eee_cap_to_ethtool_sup_t + * @eee_cap: value of the MMD EEE Capability register + * + * A small helper function that translates MMD EEE Capability (3.20) bits + * to ethtool supported settings. + */ +static inline u32 __kc_mmd_eee_cap_to_ethtool_sup_t(u16 eee_cap) +{ + u32 supported = 0; + + if (eee_cap & MDIO_EEE_100TX) + supported |= SUPPORTED_100baseT_Full; + if (eee_cap & MDIO_EEE_1000T) + supported |= SUPPORTED_1000baseT_Full; + if (eee_cap & MDIO_EEE_10GT) + supported |= SUPPORTED_10000baseT_Full; + if (eee_cap & MDIO_EEE_1000KX) + supported |= SUPPORTED_1000baseKX_Full; + if (eee_cap & MDIO_EEE_10GKX4) + supported |= SUPPORTED_10000baseKX4_Full; + if (eee_cap & MDIO_EEE_10GKR) + supported |= SUPPORTED_10000baseKR_Full; + + return supported; +} +#define mmd_eee_cap_to_ethtool_sup_t(eee_cap) \ + __kc_mmd_eee_cap_to_ethtool_sup_t(eee_cap) + +/** + * mmd_eee_adv_to_ethtool_adv_t + * @eee_adv: value of the MMD EEE Advertisement/Link Partner Ability registers + * + * A small helper function that translates the MMD EEE Advertisement (7.60) + * and MMD EEE Link Partner Ability (7.61) bits to ethtool advertisement + * settings. + */ +static inline u32 __kc_mmd_eee_adv_to_ethtool_adv_t(u16 eee_adv) +{ + u32 adv = 0; + + if (eee_adv & MDIO_EEE_100TX) + adv |= ADVERTISED_100baseT_Full; + if (eee_adv & MDIO_EEE_1000T) + adv |= ADVERTISED_1000baseT_Full; + if (eee_adv & MDIO_EEE_10GT) + adv |= ADVERTISED_10000baseT_Full; + if (eee_adv & MDIO_EEE_1000KX) + adv |= ADVERTISED_1000baseKX_Full; + if (eee_adv & MDIO_EEE_10GKX4) + adv |= ADVERTISED_10000baseKX4_Full; + if (eee_adv & MDIO_EEE_10GKR) + adv |= ADVERTISED_10000baseKR_Full; + + return adv; +} +#define mmd_eee_adv_to_ethtool_adv_t(eee_adv) \ + __kc_mmd_eee_adv_to_ethtool_adv_t(eee_adv) + +/** + * ethtool_adv_to_mmd_eee_adv_t + * @adv: the ethtool advertisement settings + * + * A small helper function that translates ethtool advertisement settings + * to EEE advertisements for the MMD EEE Advertisement (7.60) and + * MMD EEE Link Partner Ability (7.61) registers. + */ +static inline u16 __kc_ethtool_adv_to_mmd_eee_adv_t(u32 adv) +{ + u16 reg = 0; + + if (adv & ADVERTISED_100baseT_Full) + reg |= MDIO_EEE_100TX; + if (adv & ADVERTISED_1000baseT_Full) + reg |= MDIO_EEE_1000T; + if (adv & ADVERTISED_10000baseT_Full) + reg |= MDIO_EEE_10GT; + if (adv & ADVERTISED_1000baseKX_Full) + reg |= MDIO_EEE_1000KX; + if (adv & ADVERTISED_10000baseKX4_Full) + reg |= MDIO_EEE_10GKX4; + if (adv & ADVERTISED_10000baseKR_Full) + reg |= MDIO_EEE_10GKR; + + return reg; +} +#define ethtool_adv_to_mmd_eee_adv_t(adv) \ + __kc_ethtool_adv_to_mmd_eee_adv_t(adv) + +#ifndef pci_pcie_type +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +static inline u8 pci_pcie_type(struct pci_dev *pdev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!pos) + BUG(); + pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); + return (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; +} +#else /* < 2.6.24 */ +#define pci_pcie_type(x) (x)->pcie_type +#endif /* < 2.6.24 */ +#endif /* pci_pcie_type */ + +#define ptp_clock_register(caps, args...) ptp_clock_register(caps) + +#ifndef PCI_EXP_LNKSTA2 +int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); +#define pcie_capability_read_word(d,p,v) __kc_pcie_capability_read_word(d,p,v) +int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val); +#define pcie_capability_write_word(d,p,v) __kc_pcie_capability_write_word(d,p,v) +int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, + u16 clear, u16 set); +#define pcie_capability_clear_and_set_word(d,p,c,s) \ + __kc_pcie_capability_clear_and_set_word(d,p,c,s) + +#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ + +static inline int pcie_capability_clear_word(struct pci_dev *dev, int pos, + u16 clear) +{ + return __kc_pcie_capability_clear_and_set_word(dev, pos, clear, 0); +} +#endif /* !PCI_EXP_LNKSTA2 */ + +#if (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) +#define USE_CONST_DEV_UC_CHAR +#endif + +#else /* >= 3.7.0 */ +#define HAVE_CONST_STRUCT_PCI_ERROR_HANDLERS +#define USE_CONST_DEV_UC_CHAR +#endif /* >= 3.7.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) ) +#ifndef PCI_EXP_LNKCTL_ASPM_L0S +#define PCI_EXP_LNKCTL_ASPM_L0S 0x01 /* L0s Enable */ +#endif +#ifndef PCI_EXP_LNKCTL_ASPM_L1 +#define PCI_EXP_LNKCTL_ASPM_L1 0x02 /* L1 Enable */ +#endif +#define HAVE_CONFIG_HOTPLUG +/* Reserved Ethernet Addresses per IEEE 802.1Q */ +static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { + 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) &&\ + !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) +static inline bool is_link_local_ether_addr(const u8 *addr) +{ + __be16 *a = (__be16 *)addr; + static const __be16 *b = (const __be16 *)eth_reserved_addr_base; + static const __be16 m = cpu_to_be16(0xfff0); + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; +} +#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */ +#else /* >= 3.8.0 */ +#ifndef __devinit +#define __devinit +#define HAVE_ENCAP_CSUM_OFFLOAD +#endif + +#ifndef __devinitdata +#define __devinitdata +#endif + +#ifndef __devexit +#define __devexit +#endif + +#ifndef __devexit_p +#define __devexit_p +#endif + +#ifndef HAVE_SRIOV_CONFIGURE +#define HAVE_SRIOV_CONFIGURE +#endif + +#define HAVE_BRIDGE_ATTRIBS +#ifndef BRIDGE_MODE_VEB +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#endif /* BRIDGE_MODE_VEB */ +#ifndef BRIDGE_MODE_VEPA +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ +#endif /* BRIDGE_MODE_VEPA */ +#endif /* >= 3.8.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) + +#undef hlist_entry +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#undef hlist_entry_safe +#define hlist_entry_safe(ptr, type, member) \ + (ptr) ? hlist_entry(ptr, type, member) : NULL + +#undef hlist_for_each_entry +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) + +#undef hlist_for_each_entry_safe +#define hlist_for_each_entry_safe(pos, n, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*pos), member); \ + pos && ({ n = pos->member.next; 1; }); \ + pos = hlist_entry_safe(n, typeof(*pos), member)) + +#ifdef CONFIG_XPS +extern int __kc_netif_set_xps_queue(struct net_device *, struct cpumask *, u16); +#define netif_set_xps_queue(_dev, _mask, _idx) __kc_netif_set_xps_queue((_dev), (_mask), (_idx)) +#else /* CONFIG_XPS */ +#define netif_set_xps_queue(_dev, _mask, _idx) do {} while (0) +#endif /* CONFIG_XPS */ + +#ifdef HAVE_NETDEV_SELECT_QUEUE +#define _kc_hashrnd 0xd631614b /* not so random hash salt */ +extern u16 __kc_netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); +#define __netdev_pick_tx __kc_netdev_pick_tx +#endif /* HAVE_NETDEV_SELECT_QUEUE */ +#else +#define HAVE_BRIDGE_FILTER +#define USE_DEFAULT_FDB_DEL_DUMP +#endif /* < 3.9.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +extern int __kc_pci_vfs_assigned(struct pci_dev *dev); +#else +static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + return 0; +} +#endif +#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev) + +#ifndef VLAN_TX_COOKIE_MAGIC +static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb, + u16 vlan_tci) +{ +#ifdef VLAN_TAG_PRESENT + vlan_tci |= VLAN_TAG_PRESENT; +#endif + skb->vlan_tci = vlan_tci; + return skb; +} +#define __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci) \ + __kc__vlan_hwaccel_put_tag(skb, vlan_tci) +#endif + +#else /* >= 3.10.0 */ +#define HAVE_ENCAP_TSO_OFFLOAD +#endif /* >= 3.10.0 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) ) +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,6))) +#if (!(UBUNTU_KERNEL_CODE >= UBUNTU_KERNEL_VERSION(3,13,0,30,0) \ + && (UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(12,4) \ + || UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(14,4)))) +#if (!(SLE_VERSION_CODE == SLE_VERSION(12,0,0))) +#ifdef NETIF_F_RXHASH +#define PKT_HASH_TYPE_L3 0 +static inline void +skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) +{ + skb->rxhash = hash; +} +#endif /* NETIF_F_RXHASH */ +#endif /* < SLES12 */ +#endif /* < 3.13.0-30.54 (Ubuntu 14.04) */ +#endif /* < RHEL7 */ +#endif /* < 3.14.0 */ + +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) +#define HAVE_VF_MIN_MAX_TXRATE 1 +#endif /* >= 3.16.0 */ + +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#define HAVE_NDO_DFLT_BRIDGE_ADD_MASK +#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#define HAVE_NDO_FDB_ADD_VID +#endif /* !RHEL 7.2 */ +#endif /* >= 3.19.0 */ + +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +/* vlan_tx_xx functions got renamed to skb_vlan */ +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS +#endif /* !RHEL 7.2 */ +#endif /* 4.0.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) +/* ndo_bridge_getlink adds new nlflags parameter */ +#define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS +#endif /* >= 4.1.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) ) +/* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */ +#define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL +#endif /* >= 4.2.0 */ +#endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c new file mode 100644 index 00000000..e1a89388 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c @@ -0,0 +1,1171 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* + * net/core/ethtool.c - Ethtool ioctl handler + * Copyright (c) 2003 Matthew Wilcox + * + * This file is where we call all the ethtool_ops commands to get + * the information ethtool needs. We fall back to calling do_ioctl() + * for drivers which haven't been converted to ethtool_ops yet. + * + * It's GPL, stupid. + * + * Modification by sfeldma@pobox.com to work as backward compat + * solution for pre-ethtool_ops kernels. + * - copied struct ethtool_ops from ethtool.h + * - defined SET_ETHTOOL_OPS + * - put in some #ifndef NETIF_F_xxx wrappers + * - changes refs to dev->ethtool_ops to ethtool_ops + * - changed dev_ethtool to ethtool_ioctl + * - remove EXPORT_SYMBOL()s + * - added _kc_ prefix in built-in ethtool_op_xxx ops. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kcompat.h" + +#undef SUPPORTED_10000baseT_Full +#define SUPPORTED_10000baseT_Full (1 << 12) +#undef ADVERTISED_10000baseT_Full +#define ADVERTISED_10000baseT_Full (1 << 12) +#undef SPEED_10000 +#define SPEED_10000 10000 + +#undef ethtool_ops +#define ethtool_ops _kc_ethtool_ops + +struct _kc_ethtool_ops { + int (*get_settings)(struct net_device *, struct ethtool_cmd *); + int (*set_settings)(struct net_device *, struct ethtool_cmd *); + void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); + int (*get_regs_len)(struct net_device *); + void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); + void (*get_wol)(struct net_device *, struct ethtool_wolinfo *); + int (*set_wol)(struct net_device *, struct ethtool_wolinfo *); + u32 (*get_msglevel)(struct net_device *); + void (*set_msglevel)(struct net_device *, u32); + int (*nway_reset)(struct net_device *); + u32 (*get_link)(struct net_device *); + int (*get_eeprom_len)(struct net_device *); + int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); + int (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); + int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *); + int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *); + void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *); + int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *); + void (*get_pauseparam)(struct net_device *, + struct ethtool_pauseparam*); + int (*set_pauseparam)(struct net_device *, + struct ethtool_pauseparam*); + u32 (*get_rx_csum)(struct net_device *); + int (*set_rx_csum)(struct net_device *, u32); + u32 (*get_tx_csum)(struct net_device *); + int (*set_tx_csum)(struct net_device *, u32); + u32 (*get_sg)(struct net_device *); + int (*set_sg)(struct net_device *, u32); + u32 (*get_tso)(struct net_device *); + int (*set_tso)(struct net_device *, u32); + int (*self_test_count)(struct net_device *); + void (*self_test)(struct net_device *, struct ethtool_test *, u64 *); + void (*get_strings)(struct net_device *, u32 stringset, u8 *); + int (*phys_id)(struct net_device *, u32); + int (*get_stats_count)(struct net_device *); + void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, + u64 *); +} *ethtool_ops = NULL; + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops)) + +/* + * Some useful ethtool_ops methods that are device independent. If we find that + * all drivers want to do the same thing here, we can turn these into dev_() + * function calls. + */ + +#undef ethtool_op_get_link +#define ethtool_op_get_link _kc_ethtool_op_get_link +u32 _kc_ethtool_op_get_link(struct net_device *dev) +{ + return netif_carrier_ok(dev) ? 1 : 0; +} + +#undef ethtool_op_get_tx_csum +#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum +u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev) +{ +#ifdef NETIF_F_IP_CSUM + return (dev->features & NETIF_F_IP_CSUM) != 0; +#else + return 0; +#endif +} + +#undef ethtool_op_set_tx_csum +#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum +int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data) +{ +#ifdef NETIF_F_IP_CSUM + if (data) +#ifdef NETIF_F_IPV6_CSUM + dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); +#else + dev->features |= NETIF_F_IP_CSUM; + else + dev->features &= ~NETIF_F_IP_CSUM; +#endif +#endif + + return 0; +} + +#undef ethtool_op_get_sg +#define ethtool_op_get_sg _kc_ethtool_op_get_sg +u32 _kc_ethtool_op_get_sg(struct net_device *dev) +{ +#ifdef NETIF_F_SG + return (dev->features & NETIF_F_SG) != 0; +#else + return 0; +#endif +} + +#undef ethtool_op_set_sg +#define ethtool_op_set_sg _kc_ethtool_op_set_sg +int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data) +{ +#ifdef NETIF_F_SG + if (data) + dev->features |= NETIF_F_SG; + else + dev->features &= ~NETIF_F_SG; +#endif + + return 0; +} + +#undef ethtool_op_get_tso +#define ethtool_op_get_tso _kc_ethtool_op_get_tso +u32 _kc_ethtool_op_get_tso(struct net_device *dev) +{ +#ifdef NETIF_F_TSO + return (dev->features & NETIF_F_TSO) != 0; +#else + return 0; +#endif +} + +#undef ethtool_op_set_tso +#define ethtool_op_set_tso _kc_ethtool_op_set_tso +int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data) +{ +#ifdef NETIF_F_TSO + if (data) + dev->features |= NETIF_F_TSO; + else + dev->features &= ~NETIF_F_TSO; +#endif + + return 0; +} + +/* Handlers for each ethtool command */ + +static int ethtool_get_settings(struct net_device *dev, void *useraddr) +{ + struct ethtool_cmd cmd = { ETHTOOL_GSET }; + int err; + + if (!ethtool_ops->get_settings) + return -EOPNOTSUPP; + + err = ethtool_ops->get_settings(dev, &cmd); + if (err < 0) + return err; + + if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + return -EFAULT; + return 0; +} + +static int ethtool_set_settings(struct net_device *dev, void *useraddr) +{ + struct ethtool_cmd cmd; + + if (!ethtool_ops->set_settings) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + return ethtool_ops->set_settings(dev, &cmd); +} + +static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr) +{ + struct ethtool_drvinfo info; + struct ethtool_ops *ops = ethtool_ops; + + if (!ops->get_drvinfo) + return -EOPNOTSUPP; + + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GDRVINFO; + ops->get_drvinfo(dev, &info); + + if (ops->self_test_count) + info.testinfo_len = ops->self_test_count(dev); + if (ops->get_stats_count) + info.n_stats = ops->get_stats_count(dev); + if (ops->get_regs_len) + info.regdump_len = ops->get_regs_len(dev); + if (ops->get_eeprom_len) + info.eedump_len = ops->get_eeprom_len(dev); + + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +static int ethtool_get_regs(struct net_device *dev, char *useraddr) +{ + struct ethtool_regs regs; + struct ethtool_ops *ops = ethtool_ops; + void *regbuf; + int reglen, ret; + + if (!ops->get_regs || !ops->get_regs_len) + return -EOPNOTSUPP; + + if (copy_from_user(®s, useraddr, sizeof(regs))) + return -EFAULT; + + reglen = ops->get_regs_len(dev); + if (regs.len > reglen) + regs.len = reglen; + + regbuf = kmalloc(reglen, GFP_USER); + if (!regbuf) + return -ENOMEM; + + ops->get_regs(dev, ®s, regbuf); + + ret = -EFAULT; + if (copy_to_user(useraddr, ®s, sizeof(regs))) + goto out; + useraddr += offsetof(struct ethtool_regs, data); + if (copy_to_user(useraddr, regbuf, reglen)) + goto out; + ret = 0; + +out: + kfree(regbuf); + return ret; +} + +static int ethtool_get_wol(struct net_device *dev, char *useraddr) +{ + struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; + + if (!ethtool_ops->get_wol) + return -EOPNOTSUPP; + + ethtool_ops->get_wol(dev, &wol); + + if (copy_to_user(useraddr, &wol, sizeof(wol))) + return -EFAULT; + return 0; +} + +static int ethtool_set_wol(struct net_device *dev, char *useraddr) +{ + struct ethtool_wolinfo wol; + + if (!ethtool_ops->set_wol) + return -EOPNOTSUPP; + + if (copy_from_user(&wol, useraddr, sizeof(wol))) + return -EFAULT; + + return ethtool_ops->set_wol(dev, &wol); +} + +static int ethtool_get_msglevel(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GMSGLVL }; + + if (!ethtool_ops->get_msglevel) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_msglevel(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_msglevel(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_msglevel) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + ethtool_ops->set_msglevel(dev, edata.data); + return 0; +} + +static int ethtool_nway_reset(struct net_device *dev) +{ + if (!ethtool_ops->nway_reset) + return -EOPNOTSUPP; + + return ethtool_ops->nway_reset(dev); +} + +static int ethtool_get_link(struct net_device *dev, void *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GLINK }; + + if (!ethtool_ops->get_link) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_link(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_get_eeprom(struct net_device *dev, void *useraddr) +{ + struct ethtool_eeprom eeprom; + struct ethtool_ops *ops = ethtool_ops; + u8 *data; + int ret; + + if (!ops->get_eeprom || !ops->get_eeprom_len) + return -EOPNOTSUPP; + + if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) + return -EFAULT; + + /* Check for wrap and zero */ + if (eeprom.offset + eeprom.len <= eeprom.offset) + return -EINVAL; + + /* Check for exceeding total eeprom len */ + if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) + return -EINVAL; + + data = kmalloc(eeprom.len, GFP_USER); + if (!data) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) + goto out; + + ret = ops->get_eeprom(dev, &eeprom, data); + if (ret) + goto out; + + ret = -EFAULT; + if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) + goto out; + if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_set_eeprom(struct net_device *dev, void *useraddr) +{ + struct ethtool_eeprom eeprom; + struct ethtool_ops *ops = ethtool_ops; + u8 *data; + int ret; + + if (!ops->set_eeprom || !ops->get_eeprom_len) + return -EOPNOTSUPP; + + if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) + return -EFAULT; + + /* Check for wrap and zero */ + if (eeprom.offset + eeprom.len <= eeprom.offset) + return -EINVAL; + + /* Check for exceeding total eeprom len */ + if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) + return -EINVAL; + + data = kmalloc(eeprom.len, GFP_USER); + if (!data) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) + goto out; + + ret = ops->set_eeprom(dev, &eeprom, data); + if (ret) + goto out; + + if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) + ret = -EFAULT; + +out: + kfree(data); + return ret; +} + +static int ethtool_get_coalesce(struct net_device *dev, void *useraddr) +{ + struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; + + if (!ethtool_ops->get_coalesce) + return -EOPNOTSUPP; + + ethtool_ops->get_coalesce(dev, &coalesce); + + if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) + return -EFAULT; + return 0; +} + +static int ethtool_set_coalesce(struct net_device *dev, void *useraddr) +{ + struct ethtool_coalesce coalesce; + + if (!ethtool_ops->get_coalesce) + return -EOPNOTSUPP; + + if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) + return -EFAULT; + + return ethtool_ops->set_coalesce(dev, &coalesce); +} + +static int ethtool_get_ringparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; + + if (!ethtool_ops->get_ringparam) + return -EOPNOTSUPP; + + ethtool_ops->get_ringparam(dev, &ringparam); + + if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) + return -EFAULT; + return 0; +} + +static int ethtool_set_ringparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_ringparam ringparam; + + if (!ethtool_ops->get_ringparam) + return -EOPNOTSUPP; + + if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) + return -EFAULT; + + return ethtool_ops->set_ringparam(dev, &ringparam); +} + +static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; + + if (!ethtool_ops->get_pauseparam) + return -EOPNOTSUPP; + + ethtool_ops->get_pauseparam(dev, &pauseparam); + + if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) + return -EFAULT; + return 0; +} + +static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_pauseparam pauseparam; + + if (!ethtool_ops->get_pauseparam) + return -EOPNOTSUPP; + + if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) + return -EFAULT; + + return ethtool_ops->set_pauseparam(dev, &pauseparam); +} + +static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GRXCSUM }; + + if (!ethtool_ops->get_rx_csum) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_rx_csum(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_rx_csum) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + ethtool_ops->set_rx_csum(dev, edata.data); + return 0; +} + +static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GTXCSUM }; + + if (!ethtool_ops->get_tx_csum) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_tx_csum(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_tx_csum) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return ethtool_ops->set_tx_csum(dev, edata.data); +} + +static int ethtool_get_sg(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + + if (!ethtool_ops->get_sg) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_sg(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_sg(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_sg) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return ethtool_ops->set_sg(dev, edata.data); +} + +static int ethtool_get_tso(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GTSO }; + + if (!ethtool_ops->get_tso) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_tso(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_tso(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_tso) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return ethtool_ops->set_tso(dev, edata.data); +} + +static int ethtool_self_test(struct net_device *dev, char *useraddr) +{ + struct ethtool_test test; + struct ethtool_ops *ops = ethtool_ops; + u64 *data; + int ret; + + if (!ops->self_test || !ops->self_test_count) + return -EOPNOTSUPP; + + if (copy_from_user(&test, useraddr, sizeof(test))) + return -EFAULT; + + test.len = ops->self_test_count(dev); + data = kmalloc(test.len * sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + + ops->self_test(dev, &test, data); + + ret = -EFAULT; + if (copy_to_user(useraddr, &test, sizeof(test))) + goto out; + useraddr += sizeof(test); + if (copy_to_user(useraddr, data, test.len * sizeof(u64))) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_get_strings(struct net_device *dev, void *useraddr) +{ + struct ethtool_gstrings gstrings; + struct ethtool_ops *ops = ethtool_ops; + u8 *data; + int ret; + + if (!ops->get_strings) + return -EOPNOTSUPP; + + if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) + return -EFAULT; + + switch (gstrings.string_set) { + case ETH_SS_TEST: + if (!ops->self_test_count) + return -EOPNOTSUPP; + gstrings.len = ops->self_test_count(dev); + break; + case ETH_SS_STATS: + if (!ops->get_stats_count) + return -EOPNOTSUPP; + gstrings.len = ops->get_stats_count(dev); + break; + default: + return -EINVAL; + } + + data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + if (!data) + return -ENOMEM; + + ops->get_strings(dev, gstrings.string_set, data); + + ret = -EFAULT; + if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) + goto out; + useraddr += sizeof(gstrings); + if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_phys_id(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!ethtool_ops->phys_id) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return ethtool_ops->phys_id(dev, id.data); +} + +static int ethtool_get_stats(struct net_device *dev, void *useraddr) +{ + struct ethtool_stats stats; + struct ethtool_ops *ops = ethtool_ops; + u64 *data; + int ret; + + if (!ops->get_ethtool_stats || !ops->get_stats_count) + return -EOPNOTSUPP; + + if (copy_from_user(&stats, useraddr, sizeof(stats))) + return -EFAULT; + + stats.n_stats = ops->get_stats_count(dev); + data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + + ops->get_ethtool_stats(dev, &stats, data); + + ret = -EFAULT; + if (copy_to_user(useraddr, &stats, sizeof(stats))) + goto out; + useraddr += sizeof(stats); + if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +/* The main entry point in this file. Called from net/core/dev.c */ + +#define ETHTOOL_OPS_COMPAT +int ethtool_ioctl(struct ifreq *ifr) +{ + struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + void *useraddr = (void *) ifr->ifr_data; + u32 ethcmd; + + /* + * XXX: This can be pushed down into the ethtool_* handlers that + * need it. Keep existing behavior for the moment. + */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!dev || !netif_device_present(dev)) + return -ENODEV; + + if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_GSET: + return ethtool_get_settings(dev, useraddr); + case ETHTOOL_SSET: + return ethtool_set_settings(dev, useraddr); + case ETHTOOL_GDRVINFO: + return ethtool_get_drvinfo(dev, useraddr); + case ETHTOOL_GREGS: + return ethtool_get_regs(dev, useraddr); + case ETHTOOL_GWOL: + return ethtool_get_wol(dev, useraddr); + case ETHTOOL_SWOL: + return ethtool_set_wol(dev, useraddr); + case ETHTOOL_GMSGLVL: + return ethtool_get_msglevel(dev, useraddr); + case ETHTOOL_SMSGLVL: + return ethtool_set_msglevel(dev, useraddr); + case ETHTOOL_NWAY_RST: + return ethtool_nway_reset(dev); + case ETHTOOL_GLINK: + return ethtool_get_link(dev, useraddr); + case ETHTOOL_GEEPROM: + return ethtool_get_eeprom(dev, useraddr); + case ETHTOOL_SEEPROM: + return ethtool_set_eeprom(dev, useraddr); + case ETHTOOL_GCOALESCE: + return ethtool_get_coalesce(dev, useraddr); + case ETHTOOL_SCOALESCE: + return ethtool_set_coalesce(dev, useraddr); + case ETHTOOL_GRINGPARAM: + return ethtool_get_ringparam(dev, useraddr); + case ETHTOOL_SRINGPARAM: + return ethtool_set_ringparam(dev, useraddr); + case ETHTOOL_GPAUSEPARAM: + return ethtool_get_pauseparam(dev, useraddr); + case ETHTOOL_SPAUSEPARAM: + return ethtool_set_pauseparam(dev, useraddr); + case ETHTOOL_GRXCSUM: + return ethtool_get_rx_csum(dev, useraddr); + case ETHTOOL_SRXCSUM: + return ethtool_set_rx_csum(dev, useraddr); + case ETHTOOL_GTXCSUM: + return ethtool_get_tx_csum(dev, useraddr); + case ETHTOOL_STXCSUM: + return ethtool_set_tx_csum(dev, useraddr); + case ETHTOOL_GSG: + return ethtool_get_sg(dev, useraddr); + case ETHTOOL_SSG: + return ethtool_set_sg(dev, useraddr); + case ETHTOOL_GTSO: + return ethtool_get_tso(dev, useraddr); + case ETHTOOL_STSO: + return ethtool_set_tso(dev, useraddr); + case ETHTOOL_TEST: + return ethtool_self_test(dev, useraddr); + case ETHTOOL_GSTRINGS: + return ethtool_get_strings(dev, useraddr); + case ETHTOOL_PHYS_ID: + return ethtool_phys_id(dev, useraddr); + case ETHTOOL_GSTATS: + return ethtool_get_stats(dev, useraddr); + default: + return -EOPNOTSUPP; + } + + return -EOPNOTSUPP; +} + +#define mii_if_info _kc_mii_if_info +struct _kc_mii_if_info { + int phy_id; + int advertising; + int phy_id_mask; + int reg_num_mask; + + unsigned int full_duplex : 1; /* is full duplex? */ + unsigned int force_media : 1; /* is autoneg. disabled? */ + + struct net_device *dev; + int (*mdio_read) (struct net_device *dev, int phy_id, int location); + void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val); +}; + +struct ethtool_cmd; +struct mii_ioctl_data; + +#undef mii_link_ok +#define mii_link_ok _kc_mii_link_ok +#undef mii_nway_restart +#define mii_nway_restart _kc_mii_nway_restart +#undef mii_ethtool_gset +#define mii_ethtool_gset _kc_mii_ethtool_gset +#undef mii_ethtool_sset +#define mii_ethtool_sset _kc_mii_ethtool_sset +#undef mii_check_link +#define mii_check_link _kc_mii_check_link +extern int _kc_mii_link_ok (struct mii_if_info *mii); +extern int _kc_mii_nway_restart (struct mii_if_info *mii); +extern int _kc_mii_ethtool_gset(struct mii_if_info *mii, + struct ethtool_cmd *ecmd); +extern int _kc_mii_ethtool_sset(struct mii_if_info *mii, + struct ethtool_cmd *ecmd); +extern void _kc_mii_check_link (struct mii_if_info *mii); +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) +#undef generic_mii_ioctl +#define generic_mii_ioctl _kc_generic_mii_ioctl +extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, + struct mii_ioctl_data *mii_data, int cmd, + unsigned int *duplex_changed); +#endif /* > 2.4.6 */ + + +struct _kc_pci_dev_ext { + struct pci_dev *dev; + void *pci_drvdata; + struct pci_driver *driver; +}; + +struct _kc_net_dev_ext { + struct net_device *dev; + unsigned int carrier; +}; + + +/**************************************/ +/* mii support */ + +int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) +{ + struct net_device *dev = mii->dev; + u32 advert, bmcr, lpa, nego; + + ecmd->supported = + (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | + SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); + + /* only supports twisted-pair */ + ecmd->port = PORT_MII; + + /* only supports internal transceiver */ + ecmd->transceiver = XCVR_INTERNAL; + + /* this isn't fully supported at higher layers */ + ecmd->phy_address = mii->phy_id; + + ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII; + advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); + if (advert & ADVERTISE_10HALF) + ecmd->advertising |= ADVERTISED_10baseT_Half; + if (advert & ADVERTISE_10FULL) + ecmd->advertising |= ADVERTISED_10baseT_Full; + if (advert & ADVERTISE_100HALF) + ecmd->advertising |= ADVERTISED_100baseT_Half; + if (advert & ADVERTISE_100FULL) + ecmd->advertising |= ADVERTISED_100baseT_Full; + + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA); + if (bmcr & BMCR_ANENABLE) { + ecmd->advertising |= ADVERTISED_Autoneg; + ecmd->autoneg = AUTONEG_ENABLE; + + nego = mii_nway_result(advert & lpa); + if (nego == LPA_100FULL || nego == LPA_100HALF) + ecmd->speed = SPEED_100; + else + ecmd->speed = SPEED_10; + if (nego == LPA_100FULL || nego == LPA_10FULL) { + ecmd->duplex = DUPLEX_FULL; + mii->full_duplex = 1; + } else { + ecmd->duplex = DUPLEX_HALF; + mii->full_duplex = 0; + } + } else { + ecmd->autoneg = AUTONEG_DISABLE; + + ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; + ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; + } + + /* ignore maxtxpkt, maxrxpkt for now */ + + return 0; +} + +int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) +{ + struct net_device *dev = mii->dev; + + if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100) + return -EINVAL; + if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL) + return -EINVAL; + if (ecmd->port != PORT_MII) + return -EINVAL; + if (ecmd->transceiver != XCVR_INTERNAL) + return -EINVAL; + if (ecmd->phy_address != mii->phy_id) + return -EINVAL; + if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + /* ignore supported, maxtxpkt, maxrxpkt */ + + if (ecmd->autoneg == AUTONEG_ENABLE) { + u32 bmcr, advert, tmp; + + if ((ecmd->advertising & (ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full)) == 0) + return -EINVAL; + + /* advertise only what has been requested */ + advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); + tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); + if (ADVERTISED_10baseT_Half) + tmp |= ADVERTISE_10HALF; + if (ADVERTISED_10baseT_Full) + tmp |= ADVERTISE_10FULL; + if (ADVERTISED_100baseT_Half) + tmp |= ADVERTISE_100HALF; + if (ADVERTISED_100baseT_Full) + tmp |= ADVERTISE_100FULL; + if (advert != tmp) { + mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp); + mii->advertising = tmp; + } + + /* turn on autonegotiation, and force a renegotiate */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); + mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr); + + mii->force_media = 0; + } else { + u32 bmcr, tmp; + + /* turn off auto negotiation, set speed and duplexity */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX); + if (ecmd->speed == SPEED_100) + tmp |= BMCR_SPEED100; + if (ecmd->duplex == DUPLEX_FULL) { + tmp |= BMCR_FULLDPLX; + mii->full_duplex = 1; + } else + mii->full_duplex = 0; + if (bmcr != tmp) + mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp); + + mii->force_media = 1; + } + return 0; +} + +int _kc_mii_link_ok (struct mii_if_info *mii) +{ + /* first, a dummy read, needed to latch some MII phys */ + mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR); + if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS) + return 1; + return 0; +} + +int _kc_mii_nway_restart (struct mii_if_info *mii) +{ + int bmcr; + int r = -EINVAL; + + /* if autoneg is off, it's an error */ + bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR); + + if (bmcr & BMCR_ANENABLE) { + bmcr |= BMCR_ANRESTART; + mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr); + r = 0; + } + + return r; +} + +void _kc_mii_check_link (struct mii_if_info *mii) +{ + int cur_link = mii_link_ok(mii); + int prev_link = netif_carrier_ok(mii->dev); + + if (cur_link && !prev_link) + netif_carrier_on(mii->dev); + else if (prev_link && !cur_link) + netif_carrier_off(mii->dev); +} + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) +int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, + struct mii_ioctl_data *mii_data, int cmd, + unsigned int *duplex_chg_out) +{ + int rc = 0; + unsigned int duplex_changed = 0; + + if (duplex_chg_out) + *duplex_chg_out = 0; + + mii_data->phy_id &= mii_if->phy_id_mask; + mii_data->reg_num &= mii_if->reg_num_mask; + + switch(cmd) { + case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */ + case SIOCGMIIPHY: + mii_data->phy_id = mii_if->phy_id; + /* fall through */ + + case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */ + case SIOCGMIIREG: + mii_data->val_out = + mii_if->mdio_read(mii_if->dev, mii_data->phy_id, + mii_data->reg_num); + break; + + case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */ + case SIOCSMIIREG: { + u16 val = mii_data->val_in; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (mii_data->phy_id == mii_if->phy_id) { + switch(mii_data->reg_num) { + case MII_BMCR: { + unsigned int new_duplex = 0; + if (val & (BMCR_RESET|BMCR_ANENABLE)) + mii_if->force_media = 0; + else + mii_if->force_media = 1; + if (mii_if->force_media && + (val & BMCR_FULLDPLX)) + new_duplex = 1; + if (mii_if->full_duplex != new_duplex) { + duplex_changed = 1; + mii_if->full_duplex = new_duplex; + } + break; + } + case MII_ADVERTISE: + mii_if->advertising = val; + break; + default: + /* do nothing */ + break; + } + } + + mii_if->mdio_write(mii_if->dev, mii_data->phy_id, + mii_data->reg_num, val); + break; + } + + default: + rc = -EOPNOTSUPP; + break; + } + + if ((rc == 0) && (duplex_chg_out) && (duplex_changed)) + *duplex_chg_out = 1; + + return rc; +} +#endif /* > 2.4.6 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING new file mode 100644 index 00000000..5f297e5b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING @@ -0,0 +1,339 @@ + +"This software program is licensed subject to the GNU General Public License +(GPL). Version 2, June 1991, available at +" + +GNU General Public License + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public License is intended +to guarantee your freedom to share and change free software--to make sure +the software is free for all its users. This General Public License applies +to most of the Free Software Foundation's software and to any other program +whose authors commit to using it. (Some other Free Software Foundation +software is covered by the GNU Library General Public License instead.) You +can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you +wish), that you receive source code or can get it if you want it, that you +can change the software or use pieces of it in new free programs; and that +you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to +deny you these rights or to ask you to surrender the rights. These +restrictions translate to certain responsibilities for you if you distribute +copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or +for a fee, you must give the recipients all the rights that you have. You +must make sure that they, too, receive or can get the source code. And you +must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If +the software is modified by someone else and passed on, we want its +recipients to know that what they have is not the original, so that any +problems introduced by others will not reflect on the original authors' +reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will +individually obtain patent licenses, in effect making the program +proprietary. To prevent this, we have made it clear that any patent must be +licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice + placed by the copyright holder saying it may be distributed under the + terms of this General Public License. The "Program", below, refers to any + such program or work, and a "work based on the Program" means either the + Program or any derivative work under copyright law: that is to say, a + work containing the Program or a portion of it, either verbatim or with + modifications and/or translated into another language. (Hereinafter, + translation is included without limitation in the term "modification".) + Each licensee is addressed as "you". + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + the Program is not restricted, and the output from the Program is covered + only if its contents constitute a work based on the Program (independent + of having been made by running the Program). Whether that is true depends + on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code + as you receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice and + disclaimer of warranty; keep intact all the notices that refer to this + License and to the absence of any warranty; and give any other recipients + of the Program a copy of this License along with the Program. + + You may charge a fee for the physical act of transferring a copy, and you + may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, + thus forming a work based on the Program, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + * a) You must cause the modified files to carry prominent notices stating + that you changed the files and the date of any change. + + * b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any part + thereof, to be licensed as a whole at no charge to all third parties + under the terms of this License. + + * c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive + use in the most ordinary way, to print or display an announcement + including an appropriate copyright notice and a notice that there is + no warranty (or else, saying that you provide a warranty) and that + users may redistribute the program under these conditions, and + telling the user how to view a copy of this License. (Exception: if + the Program itself is interactive but does not normally print such + an announcement, your work based on the Program is not required to + print an announcement.) + + These requirements apply to the modified work as a whole. If identifiable + sections of that work are not derived from the Program, and can be + reasonably considered independent and separate works in themselves, then + this License, and its terms, do not apply to those sections when you + distribute them as separate works. But when you distribute the same + sections as part of a whole which is a work based on the Program, the + distribution of the whole must be on the terms of this License, whose + permissions for other licensees extend to the entire whole, and thus to + each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Program. + + In addition, mere aggregation of another work not based on the Program + with the Program (or with a work based on the Program) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under + Section 2) in object code or executable form under the terms of Sections + 1 and 2 above provided that you also do one of the following: + + * a) Accompany it with the complete corresponding machine-readable source + code, which must be distributed under the terms of Sections 1 and 2 + above on a medium customarily used for software interchange; or, + + * b) Accompany it with a written offer, valid for at least three years, + to give any third party, for a charge no more than your cost of + physically performing source distribution, a complete machine- + readable copy of the corresponding source code, to be distributed + under the terms of Sections 1 and 2 above on a medium customarily + used for software interchange; or, + + * c) Accompany it with the information you received as to the offer to + distribute corresponding source code. (This alternative is allowed + only for noncommercial distribution and only if you received the + program in object code or executable form with such an offer, in + accord with Subsection b above.) + + The source code for a work means the preferred form of the work for + making modifications to it. For an executable work, complete source code + means all the source code for all modules it contains, plus any + associated interface definition files, plus the scripts used to control + compilation and installation of the executable. However, as a special + exception, the source code distributed need not include anything that is + normally distributed (in either source or binary form) with the major + components (compiler, kernel, and so on) of the operating system on which + the executable runs, unless that component itself accompanies the + executable. + + If distribution of executable or object code is made by offering access + to copy from a designated place, then offering equivalent access to copy + the source code from the same place counts as distribution of the source + code, even though third parties are not compelled to copy the source + along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as + expressly provided under this License. Any attempt otherwise to copy, + modify, sublicense or distribute the Program is void, and will + automatically terminate your rights under this License. However, parties + who have received copies, or rights, from you under this License will not + have their licenses terminated so long as such parties remain in full + compliance. + +5. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Program or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Program (or any work based on the Program), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Program or works + based on it. + +6. Each time you redistribute the Program (or any work based on the + Program), the recipient automatically receives a license from the + original licensor to copy, distribute or modify the Program subject to + these terms and conditions. You may not impose any further restrictions + on the recipients' exercise of the rights granted herein. You are not + responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot distribute + so as to satisfy simultaneously your obligations under this License and + any other pertinent obligations, then as a consequence you may not + distribute the Program at all. For example, if a patent license would + not permit royalty-free redistribution of the Program by all those who + receive copies directly or indirectly through you, then the only way you + could satisfy both it and this License would be to refrain entirely from + distribution of the Program. + + If any portion of this section is held invalid or unenforceable under any + particular circumstance, the balance of the section is intended to apply + and the section as a whole is intended to apply in other circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system, which is implemented + by public license practices. Many people have made generous contributions + to the wide range of software distributed through that system in + reliance on consistent application of that system; it is up to the + author/donor to decide if he or she is willing to distribute software + through any other system and a licensee cannot impose that choice. + + This section is intended to make thoroughly clear what is believed to be + a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Program under this License may add an + explicit geographical distribution limitation excluding those countries, + so that distribution is permitted only in or among countries not thus + excluded. In such case, this License incorporates the limitation as if + written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of + the General Public License from time to time. Such new versions will be + similar in spirit to the present version, but may differ in detail to + address new problems or concerns. + + Each version is given a distinguishing version number. If the Program + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Program does not specify a version + number of this License, you may choose any version ever published by the + Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs + whose distribution conditions are different, write to the author to ask + for permission. For software which is copyrighted by the Free Software + Foundation, write to the Free Software Foundation; we sometimes make + exceptions for this. Our decision will be guided by the two goals of + preserving the free status of all derivatives of our free software and + of promoting the sharing and reuse of software generally. + + NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH + YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it free +software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey the +exclusion of warranty; and each file should have at least the "copyright" +line and a pointer to where the full notice is found. + +one line to give the program's name and an idea of what it does. +Copyright (C) yyyy name of author + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 +Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when +it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes +with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free +software, and you are welcome to redistribute it under certain conditions; +type 'show c' for details. + +The hypothetical commands 'show w' and 'show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than 'show w' and 'show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program +'Gnomovision' (which makes passes at compilers) written by James Hacker. + +signature of Ty Coon, 1 April 1989 +Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General Public +License instead of this License. diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h new file mode 100644 index 00000000..222c2c71 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h @@ -0,0 +1,925 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_H_ +#define _IXGBE_H_ + +#ifndef IXGBE_NO_LRO +#include +#endif + +#include +#include +#ifdef HAVE_IRQ_AFFINITY_HINT +#include +#endif /* HAVE_IRQ_AFFINITY_HINT */ +#include + +#ifdef SIOCETHTOOL +#include +#endif +#ifdef NETIF_F_HW_VLAN_TX +#include +#endif +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +#define IXGBE_DCA +#include +#endif +#include "ixgbe_dcb.h" + +#include "kcompat.h" + +#ifdef HAVE_SCTP +#include +#endif + +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#define IXGBE_FCOE +#include "ixgbe_fcoe.h" +#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */ + +#if defined(CONFIG_PTP_1588_CLOCK) || defined(CONFIG_PTP_1588_CLOCK_MODULE) +#define HAVE_IXGBE_PTP +#endif + +#include "ixgbe_api.h" + +#define PFX "ixgbe: " +#define DPRINTK(nlevel, klevel, fmt, args...) \ + ((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ + printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ + __func__ , ## args))) + +/* TX/RX descriptor defines */ +#define IXGBE_DEFAULT_TXD 512 +#define IXGBE_DEFAULT_TX_WORK 256 +#define IXGBE_MAX_TXD 4096 +#define IXGBE_MIN_TXD 64 + +#define IXGBE_DEFAULT_RXD 512 +#define IXGBE_DEFAULT_RX_WORK 256 +#define IXGBE_MAX_RXD 4096 +#define IXGBE_MIN_RXD 64 + + +/* flow control */ +#define IXGBE_MIN_FCRTL 0x40 +#define IXGBE_MAX_FCRTL 0x7FF80 +#define IXGBE_MIN_FCRTH 0x600 +#define IXGBE_MAX_FCRTH 0x7FFF0 +#define IXGBE_DEFAULT_FCPAUSE 0xFFFF +#define IXGBE_MIN_FCPAUSE 0 +#define IXGBE_MAX_FCPAUSE 0xFFFF + +/* Supported Rx Buffer Sizes */ +#define IXGBE_RXBUFFER_512 512 /* Used for packet split */ +#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT +#define IXGBE_RXBUFFER_1536 1536 +#define IXGBE_RXBUFFER_2K 2048 +#define IXGBE_RXBUFFER_3K 3072 +#define IXGBE_RXBUFFER_4K 4096 +#define IXGBE_RXBUFFER_7K 7168 +#define IXGBE_RXBUFFER_8K 8192 +#define IXGBE_RXBUFFER_15K 15360 +#endif /* CONFIG_IXGBE_DISABLE_PACKET_SPLIT */ +#define IXGBE_MAX_RXBUFFER 16384 /* largest size for single descriptor */ + +/* + * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN mans we + * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, + * this adds up to 512 bytes of extra data meaning the smallest allocation + * we could have is 1K. + * i.e. RXBUFFER_512 --> size-1024 slab + */ +#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_512 + +#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) + +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define IXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */ + +#define IXGBE_TX_FLAGS_CSUM (u32)(1) +#define IXGBE_TX_FLAGS_HW_VLAN (u32)(1 << 1) +#define IXGBE_TX_FLAGS_SW_VLAN (u32)(1 << 2) +#define IXGBE_TX_FLAGS_TSO (u32)(1 << 3) +#define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 4) +#define IXGBE_TX_FLAGS_FCOE (u32)(1 << 5) +#define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) +#define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) +#define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8) +#define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 +#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 +#define IXGBE_TX_FLAGS_VLAN_SHIFT 16 + +#define IXGBE_MAX_RX_DESC_POLL 10 + +#define IXGBE_MAX_VF_MC_ENTRIES 30 +#define IXGBE_MAX_VF_FUNCTIONS 64 +#define IXGBE_MAX_VFTA_ENTRIES 128 +#define MAX_EMULATION_MAC_ADDRS 16 +#define IXGBE_MAX_PF_MACVLANS 15 +#define IXGBE_82599_VF_DEVICE_ID 0x10ED +#define IXGBE_X540_VF_DEVICE_ID 0x1515 + +#ifdef CONFIG_PCI_IOV +#define VMDQ_P(p) ((p) + adapter->num_vfs) +#else +#define VMDQ_P(p) (p) +#endif + +#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \ + { \ + u32 current_counter = IXGBE_READ_REG(hw, reg); \ + if (current_counter < last_counter) \ + counter += 0x100000000LL; \ + last_counter = current_counter; \ + counter &= 0xFFFFFFFF00000000LL; \ + counter |= current_counter; \ + } + +#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \ + { \ + u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb); \ + u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb); \ + u64 current_counter = (current_counter_msb << 32) | \ + current_counter_lsb; \ + if (current_counter < last_counter) \ + counter += 0x1000000000LL; \ + last_counter = current_counter; \ + counter &= 0xFFFFFFF000000000LL; \ + counter |= current_counter; \ + } + +struct vf_stats { + u64 gprc; + u64 gorc; + u64 gptc; + u64 gotc; + u64 mprc; +}; + +struct vf_data_storage { + unsigned char vf_mac_addresses[ETH_ALEN]; + u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; + u16 num_vf_mc_hashes; + u16 default_vf_vlan_id; + u16 vlans_enabled; + bool clear_to_send; + struct vf_stats vfstats; + struct vf_stats last_vfstats; + struct vf_stats saved_rst_vfstats; + bool pf_set_mac; + u16 pf_vlan; /* When set, guest VLAN config not allowed. */ + u16 pf_qos; + u16 tx_rate; + u16 vlan_count; + u8 spoofchk_enabled; + struct pci_dev *vfdev; +}; + +struct vf_macvlans { + struct list_head l; + int vf; + bool free; + bool is_macvlan; + u8 vf_macvlan[ETH_ALEN]; +}; + +#ifndef IXGBE_NO_LRO +#define IXGBE_LRO_MAX 32 /*Maximum number of LRO descriptors*/ +#define IXGBE_LRO_GLOBAL 10 + +struct ixgbe_lro_stats { + u32 flushed; + u32 coal; +}; + +/* + * ixgbe_lro_header - header format to be aggregated by LRO + * @iph: IP header without options + * @tcp: TCP header + * @ts: Optional TCP timestamp data in TCP options + * + * This structure relies on the check above that verifies that the header + * is IPv4 and does not contain any options. + */ +struct ixgbe_lrohdr { + struct iphdr iph; + struct tcphdr th; + __be32 ts[0]; +}; + +struct ixgbe_lro_list { + struct sk_buff_head active; + struct ixgbe_lro_stats stats; +}; + +#endif /* IXGBE_NO_LRO */ +#define IXGBE_MAX_TXD_PWR 14 +#define IXGBE_MAX_DATA_PER_TXD (1 << IXGBE_MAX_TXD_PWR) + +/* Tx Descriptors needed, worst case */ +#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD) +#ifdef MAX_SKB_FRAGS +#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4) +#else +#define DESC_NEEDED 4 +#endif + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer */ +struct ixgbe_tx_buffer { + union ixgbe_adv_tx_desc *next_to_watch; + unsigned long time_stamp; + struct sk_buff *skb; + unsigned int bytecount; + unsigned short gso_segs; + __be16 protocol; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; +}; + +struct ixgbe_rx_buffer { + struct sk_buff *skb; + dma_addr_t dma; +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + struct page *page; + unsigned int page_offset; +#endif +}; + +struct ixgbe_queue_stats { + u64 packets; + u64 bytes; +}; + +struct ixgbe_tx_queue_stats { + u64 restart_queue; + u64 tx_busy; + u64 tx_done_old; +}; + +struct ixgbe_rx_queue_stats { + u64 rsc_count; + u64 rsc_flush; + u64 non_eop_descs; + u64 alloc_rx_page_failed; + u64 alloc_rx_buff_failed; + u64 csum_err; +}; + +enum ixgbe_ring_state_t { + __IXGBE_TX_FDIR_INIT_DONE, + __IXGBE_TX_DETECT_HANG, + __IXGBE_HANG_CHECK_ARMED, + __IXGBE_RX_RSC_ENABLED, +#ifndef HAVE_NDO_SET_FEATURES + __IXGBE_RX_CSUM_ENABLED, +#endif + __IXGBE_RX_CSUM_UDP_ZERO_ERR, +#ifdef IXGBE_FCOE + __IXGBE_RX_FCOE_BUFSZ, +#endif +}; + +#define check_for_tx_hang(ring) \ + test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) +#define set_check_for_tx_hang(ring) \ + set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) +#define clear_check_for_tx_hang(ring) \ + clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) +#ifndef IXGBE_NO_HW_RSC +#define ring_is_rsc_enabled(ring) \ + test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) +#else +#define ring_is_rsc_enabled(ring) false +#endif +#define set_ring_rsc_enabled(ring) \ + set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) +#define clear_ring_rsc_enabled(ring) \ + clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) +#define netdev_ring(ring) (ring->netdev) +#define ring_queue_index(ring) (ring->queue_index) + + +struct ixgbe_ring { + struct ixgbe_ring *next; /* pointer to next ring in q_vector */ + struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */ + struct net_device *netdev; /* netdev ring belongs to */ + struct device *dev; /* device for DMA mapping */ + void *desc; /* descriptor ring memory */ + union { + struct ixgbe_tx_buffer *tx_buffer_info; + struct ixgbe_rx_buffer *rx_buffer_info; + }; + unsigned long state; + u8 __iomem *tail; + dma_addr_t dma; /* phys. address of descriptor ring */ + unsigned int size; /* length in bytes */ + + u16 count; /* amount of descriptors */ + + u8 queue_index; /* needed for multiqueue queue management */ + u8 reg_idx; /* holds the special value that gets + * the hardware register offset + * associated with this ring, which is + * different for DCB and RSS modes + */ + u16 next_to_use; + u16 next_to_clean; + + union { +#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + u16 rx_buf_len; +#else + u16 next_to_alloc; +#endif + struct { + u8 atr_sample_rate; + u8 atr_count; + }; + }; + + u8 dcb_tc; + struct ixgbe_queue_stats stats; + union { + struct ixgbe_tx_queue_stats tx_stats; + struct ixgbe_rx_queue_stats rx_stats; + }; +} ____cacheline_internodealigned_in_smp; + +enum ixgbe_ring_f_enum { + RING_F_NONE = 0, + RING_F_VMDQ, /* SR-IOV uses the same ring feature */ + RING_F_RSS, + RING_F_FDIR, +#ifdef IXGBE_FCOE + RING_F_FCOE, +#endif /* IXGBE_FCOE */ + RING_F_ARRAY_SIZE /* must be last in enum set */ +}; + +#define IXGBE_MAX_DCB_INDICES 8 +#define IXGBE_MAX_RSS_INDICES 16 +#define IXGBE_MAX_VMDQ_INDICES 64 +#define IXGBE_MAX_FDIR_INDICES 64 +#ifdef IXGBE_FCOE +#define IXGBE_MAX_FCOE_INDICES 8 +#define MAX_RX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) +#define MAX_TX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) +#else +#define MAX_RX_QUEUES IXGBE_MAX_FDIR_INDICES +#define MAX_TX_QUEUES IXGBE_MAX_FDIR_INDICES +#endif /* IXGBE_FCOE */ +struct ixgbe_ring_feature { + int indices; + int mask; +}; + +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT +/* + * FCoE requires that all Rx buffers be over 2200 bytes in length. Since + * this is twice the size of a half page we need to double the page order + * for FCoE enabled Rx queues. + */ +#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ + return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0; +} +#else +#define ixgbe_rx_pg_order(_ring) 0 +#endif +#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) +#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) + +#endif +struct ixgbe_ring_container { + struct ixgbe_ring *ring; /* pointer to linked list of rings */ + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_packets; /* total packets processed this int */ + u16 work_limit; /* total work allowed per interrupt */ + u8 count; /* total number of rings in vector */ + u8 itr; /* current ITR setting for ring */ +}; + +/* iterator for handling rings in ring container */ +#define ixgbe_for_each_ring(pos, head) \ + for (pos = (head).ring; pos != NULL; pos = pos->next) + +#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ + ? 8 : 1) +#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS + +/* MAX_MSIX_Q_VECTORS of these are allocated, + * but we only use one per queue-specific vector. + */ +struct ixgbe_q_vector { + struct ixgbe_adapter *adapter; + int cpu; /* CPU for DCA */ + u16 v_idx; /* index of q_vector within array, also used for + * finding the bit in EICR and friends that + * represents the vector for this ring */ + u16 itr; /* Interrupt throttle rate written to EITR */ + struct ixgbe_ring_container rx, tx; + +#ifdef CONFIG_IXGBE_NAPI + struct napi_struct napi; +#endif +#ifndef HAVE_NETDEV_NAPI_LIST + struct net_device poll_dev; +#endif +#ifdef HAVE_IRQ_AFFINITY_HINT + cpumask_t affinity_mask; +#endif +#ifndef IXGBE_NO_LRO + struct ixgbe_lro_list lrolist; /* LRO list for queue vector*/ +#endif + int numa_node; + char name[IFNAMSIZ + 9]; + + /* for dynamic allocation of rings associated with this q_vector */ + struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp; +}; + +/* + * microsecond values for various ITR rates shifted by 2 to fit itr register + * with the first 3 bits reserved 0 + */ +#define IXGBE_MIN_RSC_ITR 24 +#define IXGBE_100K_ITR 40 +#define IXGBE_20K_ITR 200 +#define IXGBE_16K_ITR 248 +#define IXGBE_10K_ITR 400 +#define IXGBE_8K_ITR 500 + +/* ixgbe_test_staterr - tests bits in Rx descriptor status and error fields */ +static inline __le32 ixgbe_test_staterr(union ixgbe_adv_rx_desc *rx_desc, + const u32 stat_err_bits) +{ + return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); +} + +/* ixgbe_desc_unused - calculate if we have unused descriptors */ +static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) +{ + u16 ntc = ring->next_to_clean; + u16 ntu = ring->next_to_use; + + return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; +} + +#define IXGBE_RX_DESC(R, i) \ + (&(((union ixgbe_adv_rx_desc *)((R)->desc))[i])) +#define IXGBE_TX_DESC(R, i) \ + (&(((union ixgbe_adv_tx_desc *)((R)->desc))[i])) +#define IXGBE_TX_CTXTDESC(R, i) \ + (&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i])) + +#define IXGBE_MAX_JUMBO_FRAME_SIZE 16128 +#ifdef IXGBE_FCOE +/* use 3K as the baby jumbo frame size for FCoE */ +#define IXGBE_FCOE_JUMBO_FRAME_SIZE 3072 +#endif /* IXGBE_FCOE */ + +#define TCP_TIMER_VECTOR 0 +#define OTHER_VECTOR 1 +#define NON_Q_VECTORS (OTHER_VECTOR + TCP_TIMER_VECTOR) + +#define IXGBE_MAX_MSIX_Q_VECTORS_82599 64 +#define IXGBE_MAX_MSIX_Q_VECTORS_82598 16 + +struct ixgbe_mac_addr { + u8 addr[ETH_ALEN]; + u16 queue; + u16 state; /* bitmask */ +}; +#define IXGBE_MAC_STATE_DEFAULT 0x1 +#define IXGBE_MAC_STATE_MODIFIED 0x2 +#define IXGBE_MAC_STATE_IN_USE 0x4 + +#ifdef IXGBE_PROCFS +struct ixgbe_therm_proc_data { + struct ixgbe_hw *hw; + struct ixgbe_thermal_diode_data *sensor_data; +}; + +#endif /* IXGBE_PROCFS */ + +/* + * Only for array allocations in our adapter struct. On 82598, there will be + * unused entries in the array, but that's not a big deal. Also, in 82599, + * we can actually assign 64 queue vectors based on our extended-extended + * interrupt registers. This is different than 82598, which is limited to 16. + */ +#define MAX_MSIX_Q_VECTORS IXGBE_MAX_MSIX_Q_VECTORS_82599 +#define MAX_MSIX_COUNT IXGBE_MAX_MSIX_VECTORS_82599 + +#define MIN_MSIX_Q_VECTORS 1 +#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS) + +/* default to trying for four seconds */ +#define IXGBE_TRY_LINK_TIMEOUT (4 * HZ) + +/* board specific private data structure */ +struct ixgbe_adapter { +#ifdef NETIF_F_HW_VLAN_TX +#ifdef HAVE_VLAN_RX_REGISTER + struct vlan_group *vlgrp; /* must be first, see ixgbe_receive_skb */ +#else + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; +#endif +#endif /* NETIF_F_HW_VLAN_TX */ + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + + unsigned long state; + + /* Some features need tri-state capability, + * thus the additional *_CAPABLE flags. + */ + u32 flags; +#define IXGBE_FLAG_MSI_CAPABLE (u32)(1 << 0) +#define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 1) +#define IXGBE_FLAG_MSIX_CAPABLE (u32)(1 << 2) +#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 3) +#ifndef IXGBE_NO_LLI +#define IXGBE_FLAG_LLI_PUSH (u32)(1 << 4) +#endif +#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 8) +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +#define IXGBE_FLAG_DCA_ENABLED (u32)(1 << 9) +#define IXGBE_FLAG_DCA_CAPABLE (u32)(1 << 10) +#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)(1 << 11) +#else +#define IXGBE_FLAG_DCA_ENABLED (u32)0 +#define IXGBE_FLAG_DCA_CAPABLE (u32)0 +#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)0 +#endif +#define IXGBE_FLAG_MQ_CAPABLE (u32)(1 << 12) +#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 13) +#define IXGBE_FLAG_DCB_CAPABLE (u32)(1 << 14) +#define IXGBE_FLAG_RSS_ENABLED (u32)(1 << 15) +#define IXGBE_FLAG_RSS_CAPABLE (u32)(1 << 16) +#define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 18) +#define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 19) +#define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 20) +#define IXGBE_FLAG_NEED_LINK_CONFIG (u32)(1 << 21) +#define IXGBE_FLAG_FDIR_HASH_CAPABLE (u32)(1 << 22) +#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE (u32)(1 << 23) +#ifdef IXGBE_FCOE +#define IXGBE_FLAG_FCOE_CAPABLE (u32)(1 << 24) +#define IXGBE_FLAG_FCOE_ENABLED (u32)(1 << 25) +#endif /* IXGBE_FCOE */ +#define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 26) +#define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 27) +#define IXGBE_FLAG_SRIOV_REPLICATION_ENABLE (u32)(1 << 28) +#define IXGBE_FLAG_SRIOV_L2SWITCH_ENABLE (u32)(1 << 29) +#define IXGBE_FLAG_SRIOV_L2LOOPBACK_ENABLE (u32)(1 << 30) +#define IXGBE_FLAG_RX_BB_CAPABLE (u32)(1 << 31) + + u32 flags2; +#ifndef IXGBE_NO_HW_RSC +#define IXGBE_FLAG2_RSC_CAPABLE (u32)(1) +#define IXGBE_FLAG2_RSC_ENABLED (u32)(1 << 1) +#else +#define IXGBE_FLAG2_RSC_CAPABLE 0 +#define IXGBE_FLAG2_RSC_ENABLED 0 +#endif +#define IXGBE_FLAG2_VMDQ_DEFAULT_OVERRIDE (u32)(1 << 2) +#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE (u32)(1 << 4) +#define IXGBE_FLAG2_TEMP_SENSOR_EVENT (u32)(1 << 5) +#define IXGBE_FLAG2_SEARCH_FOR_SFP (u32)(1 << 6) +#define IXGBE_FLAG2_SFP_NEEDS_RESET (u32)(1 << 7) +#define IXGBE_FLAG2_RESET_REQUESTED (u32)(1 << 8) +#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT (u32)(1 << 9) +#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP (u32)(1 << 10) +#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 11) +#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED (u32)(1 << 12) + + /* Tx fast path data */ + int num_tx_queues; + u16 tx_itr_setting; + u16 tx_work_limit; + + /* Rx fast path data */ + int num_rx_queues; + u16 rx_itr_setting; + u16 rx_work_limit; + + /* TX */ + struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; + + u64 restart_queue; + u64 lsc_int; + u32 tx_timeout_count; + + /* RX */ + struct ixgbe_ring *rx_ring[MAX_RX_QUEUES]; + int num_rx_pools; /* == num_rx_queues in 82598 */ + int num_rx_queues_per_pool; /* 1 if 82598, can be many if 82599 */ + u64 hw_csum_rx_error; + u64 hw_rx_no_dma_resources; + u64 rsc_total_count; + u64 rsc_total_flush; + u64 non_eop_descs; +#ifndef CONFIG_IXGBE_NAPI + u64 rx_dropped_backlog; /* count drops from rx intr handler */ +#endif + u32 alloc_rx_page_failed; + u32 alloc_rx_buff_failed; + + struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS]; + +#ifdef HAVE_DCBNL_IEEE + struct ieee_pfc *ixgbe_ieee_pfc; + struct ieee_ets *ixgbe_ieee_ets; +#endif + struct ixgbe_dcb_config dcb_cfg; + struct ixgbe_dcb_config temp_dcb_cfg; + u8 dcb_set_bitmap; + u8 dcbx_cap; +#ifndef HAVE_MQPRIO + u8 tc; +#endif + enum ixgbe_fc_mode last_lfc_mode; + + int num_msix_vectors; + int max_msix_q_vectors; /* true count of q_vectors for device */ + struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE]; + struct msix_entry *msix_entries; + +#ifndef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats net_stats; +#endif +#ifndef IXGBE_NO_LRO + struct ixgbe_lro_stats lro_stats; +#endif + +#ifdef ETHTOOL_TEST + u32 test_icr; + struct ixgbe_ring test_tx_ring; + struct ixgbe_ring test_rx_ring; +#endif + + /* structs defined in ixgbe_hw.h */ + struct ixgbe_hw hw; + u16 msg_enable; + struct ixgbe_hw_stats stats; +#ifndef IXGBE_NO_LLI + u32 lli_port; + u32 lli_size; + u32 lli_etype; + u32 lli_vlan_pri; +#endif /* IXGBE_NO_LLI */ + + u32 *config_space; + u64 tx_busy; + unsigned int tx_ring_count; + unsigned int rx_ring_count; + + u32 link_speed; + bool link_up; + unsigned long link_check_timeout; + + struct timer_list service_timer; + struct work_struct service_task; + + struct hlist_head fdir_filter_list; + unsigned long fdir_overflow; /* number of times ATR was backed off */ + union ixgbe_atr_input fdir_mask; + int fdir_filter_count; + u32 fdir_pballoc; + u32 atr_sample_rate; + spinlock_t fdir_perfect_lock; + +#ifdef IXGBE_FCOE + struct ixgbe_fcoe fcoe; +#endif /* IXGBE_FCOE */ + u32 wol; + + u16 bd_number; + + char eeprom_id[32]; + u16 eeprom_cap; + bool netdev_registered; + u32 interrupt_event; +#ifdef HAVE_ETHTOOL_SET_PHYS_ID + u32 led_reg; +#endif + + DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); + unsigned int num_vfs; + struct vf_data_storage *vfinfo; + int vf_rate_link_speed; + struct vf_macvlans vf_mvs; + struct vf_macvlans *mv_list; +#ifdef CONFIG_PCI_IOV + u32 timer_event_accumulator; + u32 vferr_refcount; +#endif + struct ixgbe_mac_addr *mac_table; +#ifdef IXGBE_SYSFS + struct kobject *info_kobj; + struct kobject *therm_kobj[IXGBE_MAX_SENSORS]; +#else /* IXGBE_SYSFS */ +#ifdef IXGBE_PROCFS + struct proc_dir_entry *eth_dir; + struct proc_dir_entry *info_dir; + struct proc_dir_entry *therm_dir[IXGBE_MAX_SENSORS]; + struct ixgbe_therm_proc_data therm_data[IXGBE_MAX_SENSORS]; +#endif /* IXGBE_PROCFS */ +#endif /* IXGBE_SYSFS */ +}; + +struct ixgbe_fdir_filter { + struct hlist_node fdir_node; + union ixgbe_atr_input filter; + u16 sw_idx; + u16 action; +}; + +enum ixgbe_state_t { + __IXGBE_TESTING, + __IXGBE_RESETTING, + __IXGBE_DOWN, + __IXGBE_SERVICE_SCHED, + __IXGBE_IN_SFP_INIT, +}; + +struct ixgbe_cb { +#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + union { /* Union defining head/tail partner */ + struct sk_buff *head; + struct sk_buff *tail; + }; +#endif + dma_addr_t dma; +#ifndef IXGBE_NO_LRO + __be32 tsecr; /* timestamp echo response */ + u32 tsval; /* timestamp value in host order */ + u32 next_seq; /* next expected sequence number */ + u16 free; /* 65521 minus total size */ + u16 mss; /* size of data portion of packet */ +#endif /* IXGBE_NO_LRO */ +#ifdef HAVE_VLAN_RX_REGISTER + u16 vid; /* VLAN tag */ +#endif + u16 append_cnt; /* number of skb's appended */ +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + bool page_released; +#endif +}; +#define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb) + +#ifdef IXGBE_SYSFS +void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter); +int ixgbe_sysfs_init(struct ixgbe_adapter *adapter); +#endif /* IXGBE_SYSFS */ +#ifdef IXGBE_PROCFS +void ixgbe_procfs_exit(struct ixgbe_adapter *adapter); +int ixgbe_procfs_init(struct ixgbe_adapter *adapter); +int ixgbe_procfs_topdir_init(void); +void ixgbe_procfs_topdir_exit(void); +#endif /* IXGBE_PROCFS */ + +extern struct dcbnl_rtnl_ops dcbnl_ops; +extern int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max); + +extern u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 index); + +/* needed by ixgbe_main.c */ +extern int ixgbe_validate_mac_addr(u8 *mc_addr); +extern void ixgbe_check_options(struct ixgbe_adapter *adapter); +extern void ixgbe_assign_netdev_ops(struct net_device *netdev); + +/* needed by ixgbe_ethtool.c */ +extern char ixgbe_driver_name[]; +extern const char ixgbe_driver_version[]; + +extern void ixgbe_up(struct ixgbe_adapter *adapter); +extern void ixgbe_down(struct ixgbe_adapter *adapter); +extern void ixgbe_reinit_locked(struct ixgbe_adapter *adapter); +extern void ixgbe_reset(struct ixgbe_adapter *adapter); +extern void ixgbe_set_ethtool_ops(struct net_device *netdev); +extern int ixgbe_setup_rx_resources(struct ixgbe_ring *); +extern int ixgbe_setup_tx_resources(struct ixgbe_ring *); +extern void ixgbe_free_rx_resources(struct ixgbe_ring *); +extern void ixgbe_free_tx_resources(struct ixgbe_ring *); +extern void ixgbe_configure_rx_ring(struct ixgbe_adapter *, + struct ixgbe_ring *); +extern void ixgbe_configure_tx_ring(struct ixgbe_adapter *, + struct ixgbe_ring *); +extern void ixgbe_update_stats(struct ixgbe_adapter *adapter); +extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); +extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); +extern bool ixgbe_is_ixgbe(struct pci_dev *pcidev); +extern netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, + struct ixgbe_adapter *, + struct ixgbe_ring *); +extern void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *, + struct ixgbe_tx_buffer *); +extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16); +extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, + struct ixgbe_ring *); +extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter, + struct ixgbe_ring *); +extern void ixgbe_set_rx_mode(struct net_device *netdev); +extern int ixgbe_write_mc_addr_list(struct net_device *netdev); +extern int ixgbe_setup_tc(struct net_device *dev, u8 tc); +#ifdef IXGBE_FCOE +extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32); +#endif /* IXGBE_FCOE */ +extern void ixgbe_do_reset(struct net_device *netdev); +extern void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector); +extern void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter, + struct ixgbe_ring *); +extern void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter); +extern void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter); +#ifdef ETHTOOL_OPS_COMPAT +extern int ethtool_ioctl(struct ifreq *ifr); +#endif + +#ifdef IXGBE_FCOE +extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter); +extern int ixgbe_fso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, + u8 *hdr_len); +extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter); +extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb); +extern int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid, + struct scatterlist *sgl, unsigned int sgc); +#ifdef HAVE_NETDEV_OPS_FCOE_DDP_TARGET +extern int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid, + struct scatterlist *sgl, unsigned int sgc); +#endif /* HAVE_NETDEV_OPS_FCOE_DDP_TARGET */ +extern int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid); +#ifdef HAVE_NETDEV_OPS_FCOE_ENABLE +extern int ixgbe_fcoe_enable(struct net_device *netdev); +extern int ixgbe_fcoe_disable(struct net_device *netdev); +#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */ +#ifdef CONFIG_DCB +#ifdef HAVE_DCBNL_OPS_GETAPP +extern u8 ixgbe_fcoe_getapp(struct net_device *netdev); +#endif /* HAVE_DCBNL_OPS_GETAPP */ +extern u8 ixgbe_fcoe_setapp(struct ixgbe_adapter *adapter, u8 up); +#endif /* CONFIG_DCB */ +#ifdef HAVE_NETDEV_OPS_FCOE_GETWWN +extern int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type); +#endif +#endif /* IXGBE_FCOE */ + +#ifdef CONFIG_DCB +#ifdef HAVE_DCBNL_IEEE +s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame); +#endif /* HAVE_DCBNL_IEEE */ +#endif /* CONFIG_DCB */ + +extern void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring); +extern int ixgbe_get_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd); +extern int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter, + struct net_device *netdev, unsigned int vfn); +extern void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); +extern int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, + u8 *addr, u16 queue); +extern int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, + u8 *addr, u16 queue); +extern int ixgbe_available_rars(struct ixgbe_adapter *adapter); +#ifndef HAVE_VLAN_RX_REGISTER +extern void ixgbe_vlan_mode(struct net_device *, u32); +#endif +#ifndef ixgbe_get_netdev_tc_txq +#define ixgbe_get_netdev_tc_txq(dev, tc) (&dev->tc_to_txq[tc]) +#endif +extern void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); +#endif /* _IXGBE_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c new file mode 100644 index 00000000..24015844 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c @@ -0,0 +1,1296 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_type.h" +#include "ixgbe_82598.h" +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg); +static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw); +static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete); +static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *link_up, + bool link_up_wait_to_complete); +static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw); +static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw); +static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb, + u32 headroom, int strategy); + +/** + * ixgbe_set_pcie_completion_timeout - set pci-e completion timeout + * @hw: pointer to the HW structure + * + * The defaults for 82598 should be in the range of 50us to 50ms, + * however the hardware default for these parts is 500us to 1ms which is less + * than the 10ms recommended by the pci-e spec. To address this we need to + * increase the value to either 10ms to 250ms for capability version 1 config, + * or 16ms to 55ms for version 2. + **/ +void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw) +{ + u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR); + u16 pcie_devctl2; + + /* only take action if timeout value is defaulted to 0 */ + if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK) + goto out; + + /* + * if capababilities version is type 1 we can write the + * timeout of 10ms to 250ms through the GCR register + */ + if (!(gcr & IXGBE_GCR_CAP_VER2)) { + gcr |= IXGBE_GCR_CMPL_TMOUT_10ms; + goto out; + } + + /* + * for version 2 capabilities we need to write the config space + * directly in order to set the completion timeout value for + * 16ms to 55ms + */ + pcie_devctl2 = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2); + pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms; + IXGBE_WRITE_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2); +out: + /* disable completion timeout resend */ + gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND; + IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr); +} + +/** + * ixgbe_init_ops_82598 - Inits func ptrs and MAC type + * @hw: pointer to hardware structure + * + * Initialize the function pointers and assign the MAC type for 82598. + * Does not touch the hardware. + **/ +s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val; + + ret_val = ixgbe_init_phy_ops_generic(hw); + ret_val = ixgbe_init_ops_generic(hw); + + /* PHY */ + phy->ops.init = &ixgbe_init_phy_ops_82598; + + /* MAC */ + mac->ops.start_hw = &ixgbe_start_hw_82598; + mac->ops.reset_hw = &ixgbe_reset_hw_82598; + mac->ops.get_media_type = &ixgbe_get_media_type_82598; + mac->ops.get_supported_physical_layer = + &ixgbe_get_supported_physical_layer_82598; + mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598; + mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598; + mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598; + + /* RAR, Multicast, VLAN */ + mac->ops.set_vmdq = &ixgbe_set_vmdq_82598; + mac->ops.clear_vmdq = &ixgbe_clear_vmdq_82598; + mac->ops.set_vfta = &ixgbe_set_vfta_82598; + mac->ops.set_vlvf = NULL; + mac->ops.clear_vfta = &ixgbe_clear_vfta_82598; + + /* Flow Control */ + mac->ops.fc_enable = &ixgbe_fc_enable_82598; + + mac->mcft_size = 128; + mac->vft_size = 128; + mac->num_rar_entries = 16; + mac->rx_pb_size = 512; + mac->max_tx_queues = 32; + mac->max_rx_queues = 64; + mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); + + /* SFP+ Module */ + phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598; + + /* Link */ + mac->ops.check_link = &ixgbe_check_mac_link_82598; + mac->ops.setup_link = &ixgbe_setup_mac_link_82598; + mac->ops.flap_tx_laser = NULL; + mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82598; + mac->ops.setup_rxpba = &ixgbe_set_rxpba_82598; + + /* Manageability interface */ + mac->ops.set_fw_drv_ver = NULL; + + return ret_val; +} + +/** + * ixgbe_init_phy_ops_82598 - PHY/SFP specific init + * @hw: pointer to hardware structure + * + * Initialize any function pointers that were not able to be + * set during init_shared_code because the PHY/SFP type was + * not known. Perform the SFP init if necessary. + * + **/ +s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u16 list_offset, data_offset; + + /* Identify the PHY */ + phy->ops.identify(hw); + + /* Overwrite the link function pointers if copper PHY */ + if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) { + mac->ops.setup_link = &ixgbe_setup_copper_link_82598; + mac->ops.get_link_capabilities = + &ixgbe_get_copper_link_capabilities_generic; + } + + switch (hw->phy.type) { + case ixgbe_phy_tn: + phy->ops.setup_link = &ixgbe_setup_phy_link_tnx; + phy->ops.check_link = &ixgbe_check_phy_link_tnx; + phy->ops.get_firmware_version = + &ixgbe_get_phy_firmware_version_tnx; + break; + case ixgbe_phy_nl: + phy->ops.reset = &ixgbe_reset_phy_nl; + + /* Call SFP+ identify routine to get the SFP+ module type */ + ret_val = phy->ops.identify_sfp(hw); + if (ret_val != 0) + goto out; + else if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) { + ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } + + /* Check to see if SFP+ module is supported */ + ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, + &list_offset, + &data_offset); + if (ret_val != 0) { + ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } + break; + default: + break; + } + +out: + return ret_val; +} + +/** + * ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function. + * Disables relaxed ordering Then set pcie completion timeout + * + **/ +s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) +{ + u32 regval; + u32 i; + s32 ret_val = 0; + + ret_val = ixgbe_start_hw_generic(hw); + + /* Disable relaxed ordering */ + for (i = 0; ((i < hw->mac.max_tx_queues) && + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval); + } + + for (i = 0; ((i < hw->mac.max_rx_queues) && + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | + IXGBE_DCA_RXCTRL_HEAD_WRO_EN); + IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); + } + + /* set the completion timeout for interface */ + if (ret_val == 0) + ixgbe_set_pcie_completion_timeout(hw); + + return ret_val; +} + +/** + * ixgbe_get_link_capabilities_82598 - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: boolean auto-negotiation value + * + * Determines the link capabilities by reading the AUTOC register. + **/ +static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + s32 status = 0; + u32 autoc = 0; + + /* + * Determine link capabilities based on the stored value of AUTOC, + * which represents EEPROM defaults. If AUTOC value has not been + * stored, use the current register value. + */ + if (hw->mac.orig_link_settings_stored) + autoc = hw->mac.orig_autoc; + else + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *autoneg = false; + break; + + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + *autoneg = false; + break; + + case IXGBE_AUTOC_LMS_1G_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *autoneg = true; + break; + + case IXGBE_AUTOC_LMS_KX4_AN: + case IXGBE_AUTOC_LMS_KX4_AN_1G_AN: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX_SUPP) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + *autoneg = true; + break; + + default: + status = IXGBE_ERR_LINK_SETUP; + break; + } + + return status; +} + +/** + * ixgbe_get_media_type_82598 - Determines media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw) +{ + enum ixgbe_media_type media_type; + + /* Detect if there is a copper PHY attached. */ + switch (hw->phy.type) { + case ixgbe_phy_cu_unknown: + case ixgbe_phy_tn: + media_type = ixgbe_media_type_copper; + goto out; + default: + break; + } + + /* Media type for I82598 is based on device ID */ + switch (hw->device_id) { + case IXGBE_DEV_ID_82598: + case IXGBE_DEV_ID_82598_BX: + /* Default device ID is mezzanine card KX/KX4 */ + media_type = ixgbe_media_type_backplane; + break; + case IXGBE_DEV_ID_82598AF_DUAL_PORT: + case IXGBE_DEV_ID_82598AF_SINGLE_PORT: + case IXGBE_DEV_ID_82598_DA_DUAL_PORT: + case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: + case IXGBE_DEV_ID_82598EB_XF_LR: + case IXGBE_DEV_ID_82598EB_SFP_LOM: + media_type = ixgbe_media_type_fiber; + break; + case IXGBE_DEV_ID_82598EB_CX4: + case IXGBE_DEV_ID_82598_CX4_DUAL_PORT: + media_type = ixgbe_media_type_cx4; + break; + case IXGBE_DEV_ID_82598AT: + case IXGBE_DEV_ID_82598AT2: + media_type = ixgbe_media_type_copper; + break; + default: + media_type = ixgbe_media_type_unknown; + break; + } +out: + return media_type; +} + +/** + * ixgbe_fc_enable_82598 - Enable flow control + * @hw: pointer to hardware structure + * + * Enable flow control according to the current settings. + **/ +s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 fctrl_reg; + u32 rmcs_reg; + u32 reg; + u32 fcrtl, fcrth; + u32 link_speed = 0; + int i; + bool link_up; + + /* Validate the water mark configuration */ + if (!hw->fc.pause_time) { + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + + /* Low water mark of zero causes XOFF floods */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + if (!hw->fc.low_water[i] || + hw->fc.low_water[i] >= hw->fc.high_water[i]) { + hw_dbg(hw, "Invalid water mark configuration\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + } + } + + /* + * On 82598 having Rx FC on causes resets while doing 1G + * so if it's on turn it off once we know link_speed. For + * more details see 82598 Specification update. + */ + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) { + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + hw->fc.requested_mode = ixgbe_fc_tx_pause; + break; + case ixgbe_fc_rx_pause: + hw->fc.requested_mode = ixgbe_fc_none; + break; + default: + /* no change */ + break; + } + } + + /* Negotiate the fc mode to use */ + ixgbe_fc_autoneg(hw); + + /* Disable any previous flow control settings */ + fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl_reg &= ~(IXGBE_FCTRL_RFCE | IXGBE_FCTRL_RPFCE); + + rmcs_reg = IXGBE_READ_REG(hw, IXGBE_RMCS); + rmcs_reg &= ~(IXGBE_RMCS_TFCE_PRIORITY | IXGBE_RMCS_TFCE_802_3X); + + /* + * The possible values of fc.current_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.current_mode) { + case ixgbe_fc_none: + /* + * Flow control is disabled by software override or autoneg. + * The code below will actually disable it in the HW. + */ + break; + case ixgbe_fc_rx_pause: + /* + * Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + fctrl_reg |= IXGBE_FCTRL_RFCE; + break; + case ixgbe_fc_tx_pause: + /* + * Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + rmcs_reg |= IXGBE_RMCS_TFCE_802_3X; + break; + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + fctrl_reg |= IXGBE_FCTRL_RFCE; + rmcs_reg |= IXGBE_RMCS_TFCE_802_3X; + break; + default: + hw_dbg(hw, "Flow control param set incorrectly\n"); + ret_val = IXGBE_ERR_CONFIG; + goto out; + break; + } + + /* Set 802.3x based flow control settings. */ + fctrl_reg |= IXGBE_FCTRL_DPF; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg); + IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg); + + /* Set up and enable Rx high/low water mark thresholds, enable XON. */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; + fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; + IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl); + IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth); + } else { + IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0); + } + + } + + /* Configure pause time (2 TCs per register) */ + reg = hw->fc.pause_time * 0x00010001; + for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++) + IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg); + + /* Configure flow control refresh threshold value */ + IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2); + +out: + return ret_val; +} + +/** + * ixgbe_start_mac_link_82598 - Configures MAC link settings + * @hw: pointer to hardware structure + * + * Configures link settings based on values in the ixgbe_hw struct. + * Restarts the link. Performs autonegotiation if needed. + **/ +static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete) +{ + u32 autoc_reg; + u32 links_reg; + u32 i; + s32 status = 0; + + /* Restart link */ + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + + /* Only poll for autoneg to complete if specified to do so */ + if (autoneg_wait_to_complete) { + if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_AN || + (autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_AN_1G_AN) { + links_reg = 0; /* Just in case Autoneg time = 0 */ + for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + if (links_reg & IXGBE_LINKS_KX_AN_COMP) + break; + msleep(100); + } + if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { + status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + hw_dbg(hw, "Autonegotiation did not complete.\n"); + } + } + } + + /* Add delay to filter out noises during initial link setup */ + msleep(50); + + return status; +} + +/** + * ixgbe_validate_link_ready - Function looks for phy link + * @hw: pointer to hardware structure + * + * Function indicates success when phy link is available. If phy is not ready + * within 5 seconds of MAC indicating link, the function returns error. + **/ +static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw) +{ + u32 timeout; + u16 an_reg; + + if (hw->device_id != IXGBE_DEV_ID_82598AT2) + return 0; + + for (timeout = 0; + timeout < IXGBE_VALIDATE_LINK_READY_TIMEOUT; timeout++) { + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &an_reg); + + if ((an_reg & IXGBE_MII_AUTONEG_COMPLETE) && + (an_reg & IXGBE_MII_AUTONEG_LINK_UP)) + break; + + msleep(100); + } + + if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) { + hw_dbg(hw, "Link was indicated but link is down\n"); + return IXGBE_ERR_LINK_SETUP; + } + + return 0; +} + +/** + * ixgbe_check_mac_link_82598 - Get link/speed status + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @link_up: true is link is up, false otherwise + * @link_up_wait_to_complete: bool used to wait for link up or not + * + * Reads the links register to determine if link is up and the current speed + **/ +static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *link_up, + bool link_up_wait_to_complete) +{ + u32 links_reg; + u32 i; + u16 link_reg, adapt_comp_reg; + + /* + * SERDES PHY requires us to read link status from undocumented + * register 0xC79F. Bit 0 set indicates link is up/ready; clear + * indicates link down. OxC00C is read to check that the XAUI lanes + * are active. Bit 0 clear indicates active; set indicates inactive. + */ + if (hw->phy.type == ixgbe_phy_nl) { + hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg); + hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg); + hw->phy.ops.read_reg(hw, 0xC00C, IXGBE_TWINAX_DEV, + &adapt_comp_reg); + if (link_up_wait_to_complete) { + for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { + if ((link_reg & 1) && + ((adapt_comp_reg & 1) == 0)) { + *link_up = true; + break; + } else { + *link_up = false; + } + msleep(100); + hw->phy.ops.read_reg(hw, 0xC79F, + IXGBE_TWINAX_DEV, + &link_reg); + hw->phy.ops.read_reg(hw, 0xC00C, + IXGBE_TWINAX_DEV, + &adapt_comp_reg); + } + } else { + if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0)) + *link_up = true; + else + *link_up = false; + } + + if (*link_up == false) + goto out; + } + + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + if (link_up_wait_to_complete) { + for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { + if (links_reg & IXGBE_LINKS_UP) { + *link_up = true; + break; + } else { + *link_up = false; + } + msleep(100); + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + } + } else { + if (links_reg & IXGBE_LINKS_UP) + *link_up = true; + else + *link_up = false; + } + + if (links_reg & IXGBE_LINKS_SPEED) + *speed = IXGBE_LINK_SPEED_10GB_FULL; + else + *speed = IXGBE_LINK_SPEED_1GB_FULL; + + if ((hw->device_id == IXGBE_DEV_ID_82598AT2) && (*link_up == true) && + (ixgbe_validate_link_ready(hw) != 0)) + *link_up = false; + +out: + return 0; +} + +/** + * ixgbe_setup_mac_link_82598 - Set MAC link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Set the link speed in the AUTOC register and restarts link. + **/ +static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN; + u32 curr_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 autoc = curr_autoc; + u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK; + + /* Check to see if speed passed in is supported. */ + ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg); + speed &= link_capabilities; + + if (speed == IXGBE_LINK_SPEED_UNKNOWN) + status = IXGBE_ERR_LINK_SETUP; + + /* Set KX4/KX support according to speed requested */ + else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN || + link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) { + autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK; + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + autoc |= IXGBE_AUTOC_KX4_SUPP; + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + autoc |= IXGBE_AUTOC_KX_SUPP; + if (autoc != curr_autoc) + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); + } + + if (status == 0) { + /* + * Setup and restart the link based on the new values in + * ixgbe_hw This will write the AUTOC register based on the new + * stored values + */ + status = ixgbe_start_mac_link_82598(hw, + autoneg_wait_to_complete); + } + + return status; +} + + +/** + * ixgbe_setup_copper_link_82598 - Set the PHY autoneg advertised field + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true if waiting is needed to complete + * + * Sets the link speed in the AUTOC register in the MAC and restarts link. + **/ +static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status; + + /* Setup the PHY according to input speed */ + status = hw->phy.ops.setup_link_speed(hw, speed, autoneg, + autoneg_wait_to_complete); + /* Set up MAC */ + ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete); + + return status; +} + +/** + * ixgbe_reset_hw_82598 - Performs hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks and + * clears all interrupts, performing a PHY reset, and performing a link (MAC) + * reset. + **/ +static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) +{ + s32 status = 0; + s32 phy_status = 0; + u32 ctrl; + u32 gheccr; + u32 i; + u32 autoc; + u8 analog_val; + + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.ops.stop_adapter(hw); + if (status != 0) + goto reset_hw_out; + + /* + * Power up the Atlas Tx lanes if they are currently powered down. + * Atlas Tx lanes are powered down for MAC loopback tests, but + * they are not automatically restored on reset. + */ + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &analog_val); + if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) { + /* Enable Tx Atlas so packets can be transmitted again */ + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, + analog_val); + + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, + analog_val); + + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, + analog_val); + + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, + analog_val); + } + + /* Reset PHY */ + if (hw->phy.reset_disable == false) { + /* PHY ops must be identified and initialized prior to reset */ + + /* Init PHY and function pointers, perform SFP setup */ + phy_status = hw->phy.ops.init(hw); + if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT) + goto mac_reset_top; + + hw->phy.ops.reset(hw); + } + +mac_reset_top: + /* + * Issue global reset to the MAC. This needs to be a SW reset. + * If link reset is used, it might reset the MAC when mng is using it + */ + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST; + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + udelay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST)) + break; + } + if (ctrl & IXGBE_CTRL_RST) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "Reset polling failed to complete.\n"); + } + + msleep(50); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR); + gheccr &= ~((1 << 21) | (1 << 18) | (1 << 9) | (1 << 6)); + IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr); + + /* + * Store the original AUTOC value if it has not been + * stored off yet. Otherwise restore the stored original + * AUTOC value since the reset operation sets back to deaults. + */ + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + if (hw->mac.orig_link_settings_stored == false) { + hw->mac.orig_autoc = autoc; + hw->mac.orig_link_settings_stored = true; + } else if (autoc != hw->mac.orig_autoc) { + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc); + } + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* + * Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table + */ + hw->mac.ops.init_rx_addrs(hw); + +reset_hw_out: + if (phy_status != 0) + status = phy_status; + + return status; +} + +/** + * ixgbe_set_vmdq_82598 - Associate a VMDq set index with a rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to associate with a VMDq index + * @vmdq: VMDq set index + **/ +s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); + rar_high &= ~IXGBE_RAH_VIND_MASK; + rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK); + IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); + return 0; +} + +/** + * ixgbe_clear_vmdq_82598 - Disassociate a VMDq set index from an rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to associate with a VMDq index + * @vmdq: VMDq clear index (not used in 82598, but elsewhere) + **/ +static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); + if (rar_high & IXGBE_RAH_VIND_MASK) { + rar_high &= ~IXGBE_RAH_VIND_MASK; + IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); + } + + return 0; +} + +/** + * ixgbe_set_vfta_82598 - Set VLAN filter table + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFTA + * @vlan_on: boolean flag to turn on/off VLAN in VFTA + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on) +{ + u32 regindex; + u32 bitindex; + u32 bits; + u32 vftabyte; + + if (vlan > 4095) + return IXGBE_ERR_PARAM; + + /* Determine 32-bit word position in array */ + regindex = (vlan >> 5) & 0x7F; /* upper seven bits */ + + /* Determine the location of the (VMD) queue index */ + vftabyte = ((vlan >> 3) & 0x03); /* bits (4:3) indicating byte array */ + bitindex = (vlan & 0x7) << 2; /* lower 3 bits indicate nibble */ + + /* Set the nibble for VMD queue index */ + bits = IXGBE_READ_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex)); + bits &= (~(0x0F << bitindex)); + bits |= (vind << bitindex); + IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex), bits); + + /* Determine the location of the bit for this VLAN id */ + bitindex = vlan & 0x1F; /* lower five bits */ + + bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); + if (vlan_on) + /* Turn on this VLAN id */ + bits |= (1 << bitindex); + else + /* Turn off this VLAN id */ + bits &= ~(1 << bitindex); + IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits); + + return 0; +} + +/** + * ixgbe_clear_vfta_82598 - Clear VLAN filter table + * @hw: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw) +{ + u32 offset; + u32 vlanbyte; + + for (offset = 0; offset < hw->mac.vft_size; offset++) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0); + + for (vlanbyte = 0; vlanbyte < 4; vlanbyte++) + for (offset = 0; offset < hw->mac.vft_size; offset++) + IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset), + 0); + + return 0; +} + +/** + * ixgbe_read_analog_reg8_82598 - Reads 8 bit Atlas analog register + * @hw: pointer to hardware structure + * @reg: analog register to read + * @val: read value + * + * Performs read operation to Atlas analog register specified. + **/ +s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val) +{ + u32 atlas_ctl; + + IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, + IXGBE_ATLASCTL_WRITE_CMD | (reg << 8)); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL); + *val = (u8)atlas_ctl; + + return 0; +} + +/** + * ixgbe_write_analog_reg8_82598 - Writes 8 bit Atlas analog register + * @hw: pointer to hardware structure + * @reg: atlas register to write + * @val: value to write + * + * Performs write operation to Atlas analog register specified. + **/ +s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val) +{ + u32 atlas_ctl; + + atlas_ctl = (reg << 8) | val; + IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, atlas_ctl); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + + return 0; +} + +/** + * ixgbe_read_i2c_eeprom_82598 - Reads 8 bit word over I2C interface. + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to read + * @eeprom_data: value read + * + * Performs 8 byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data) +{ + s32 status = 0; + u16 sfp_addr = 0; + u16 sfp_data = 0; + u16 sfp_stat = 0; + u32 i; + + if (hw->phy.type == ixgbe_phy_nl) { + /* + * NetLogic phy SDA/SCL registers are at addresses 0xC30A to + * 0xC30D. These registers are used to talk to the SFP+ + * module's EEPROM through the SDA/SCL (I2C) interface. + */ + sfp_addr = (IXGBE_I2C_EEPROM_DEV_ADDR << 8) + byte_offset; + sfp_addr = (sfp_addr | IXGBE_I2C_EEPROM_READ_MASK); + hw->phy.ops.write_reg(hw, + IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + sfp_addr); + + /* Poll status */ + for (i = 0; i < 100; i++) { + hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &sfp_stat); + sfp_stat = sfp_stat & IXGBE_I2C_EEPROM_STATUS_MASK; + if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS) + break; + msleep(10); + } + + if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) { + hw_dbg(hw, "EEPROM read did not pass.\n"); + status = IXGBE_ERR_SFP_NOT_PRESENT; + goto out; + } + + /* Read data */ + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &sfp_data); + + *eeprom_data = (u8)(sfp_data >> 8); + } else { + status = IXGBE_ERR_PHY; + goto out; + } + +out: + return status; +} + +/** + * ixgbe_get_supported_physical_layer_82598 - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw) +{ + u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 pma_pmd_10g = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK; + u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; + u16 ext_ability = 0; + + hw->phy.ops.identify(hw); + + /* Copper PHY must be checked before AUTOC LMS to determine correct + * physical layer because 10GBase-T PHYs use LMS = KX4/KX */ + switch (hw->phy.type) { + case ixgbe_phy_tn: + case ixgbe_phy_cu_unknown: + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; + if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + goto out; + default: + break; + } + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_AN: + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + if (pma_pmd_1g == IXGBE_AUTOC_1G_KX) + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX; + else + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_BX; + break; + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + if (pma_pmd_10g == IXGBE_AUTOC_10G_CX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4; + else if (pma_pmd_10g == IXGBE_AUTOC_10G_KX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + else /* XAUI */ + physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + break; + case IXGBE_AUTOC_LMS_KX4_AN: + case IXGBE_AUTOC_LMS_KX4_AN_1G_AN: + if (autoc & IXGBE_AUTOC_KX_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + break; + default: + break; + } + + if (hw->phy.type == ixgbe_phy_nl) { + hw->phy.ops.identify_sfp(hw); + + switch (hw->phy.sfp_type) { + case ixgbe_sfp_type_da_cu: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; + break; + case ixgbe_sfp_type_sr: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; + break; + case ixgbe_sfp_type_lr: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; + break; + default: + physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + break; + } + } + + switch (hw->device_id) { + case IXGBE_DEV_ID_82598_DA_DUAL_PORT: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; + break; + case IXGBE_DEV_ID_82598AF_DUAL_PORT: + case IXGBE_DEV_ID_82598AF_SINGLE_PORT: + case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; + break; + case IXGBE_DEV_ID_82598EB_XF_LR: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; + break; + default: + break; + } + +out: + return physical_layer; +} + +/** + * ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple + * port devices. + * @hw: pointer to the HW structure + * + * Calls common function and corrects issue with some single port devices + * that enable LAN1 but not LAN0. + **/ +void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw) +{ + struct ixgbe_bus_info *bus = &hw->bus; + u16 pci_gen = 0; + u16 pci_ctrl2 = 0; + + ixgbe_set_lan_id_multi_port_pcie(hw); + + /* check if LAN0 is disabled */ + hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen); + if ((pci_gen != 0) && (pci_gen != 0xFFFF)) { + + hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2); + + /* if LAN0 is completely disabled force function to 0 */ + if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) && + !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) && + !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) { + + bus->func = 0; + } + } +} + +/** + * ixgbe_set_rxpba_82598 - Initialize RX packet buffer + * @hw: pointer to hardware structure + * @num_pb: number of packet buffers to allocate + * @headroom: reserve n KB of headroom + * @strategy: packet buffer allocation strategy + **/ +static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb, + u32 headroom, int strategy) +{ + u32 rxpktsize = IXGBE_RXPBSIZE_64KB; + u8 i = 0; + + if (!num_pb) + return; + + /* Setup Rx packet buffer sizes */ + switch (strategy) { + case PBA_STRATEGY_WEIGHTED: + /* Setup the first four at 80KB */ + rxpktsize = IXGBE_RXPBSIZE_80KB; + for (; i < 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + /* Setup the last four at 48KB...don't re-init i */ + rxpktsize = IXGBE_RXPBSIZE_48KB; + /* Fall Through */ + case PBA_STRATEGY_EQUAL: + default: + /* Divide the remaining Rx packet buffer evenly among the TCs */ + for (; i < IXGBE_MAX_PACKET_BUFFERS; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + break; + } + + /* Setup Tx packet buffer sizes */ + for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB); + + return; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h new file mode 100644 index 00000000..c6abb020 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h @@ -0,0 +1,44 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_82598_H_ +#define _IXGBE_82598_H_ + +u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw); +s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw); +s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on); +s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val); +s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val); +s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data); +u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw); +s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw); +void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw); +void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw); +#endif /* _IXGBE_82598_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c new file mode 100644 index 00000000..017dfe16 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c @@ -0,0 +1,2313 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_type.h" +#include "ixgbe_82599.h" +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw); +static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw, + u16 offset, u16 *data); +static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); + +void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + + /* enable the laser control functions for SFP+ fiber */ + if (mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) { + mac->ops.disable_tx_laser = + &ixgbe_disable_tx_laser_multispeed_fiber; + mac->ops.enable_tx_laser = + &ixgbe_enable_tx_laser_multispeed_fiber; + mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber; + + } else { + mac->ops.disable_tx_laser = NULL; + mac->ops.enable_tx_laser = NULL; + mac->ops.flap_tx_laser = NULL; + } + + if (hw->phy.multispeed_fiber) { + /* Set up dual speed SFP+ support */ + mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber; + } else { + if ((ixgbe_get_media_type(hw) == ixgbe_media_type_backplane) && + (hw->phy.smart_speed == ixgbe_smart_speed_auto || + hw->phy.smart_speed == ixgbe_smart_speed_on) && + !ixgbe_verify_lesm_fw_enabled_82599(hw)) { + mac->ops.setup_link = &ixgbe_setup_mac_link_smartspeed; + } else { + mac->ops.setup_link = &ixgbe_setup_mac_link_82599; + } + } +} + +/** + * ixgbe_init_phy_ops_82599 - PHY/SFP specific init + * @hw: pointer to hardware structure + * + * Initialize any function pointers that were not able to be + * set during init_shared_code because the PHY/SFP type was + * not known. Perform the SFP init if necessary. + * + **/ +s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u32 esdp; + + if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) { + /* Store flag indicating I2C bus access control unit. */ + hw->phy.qsfp_shared_i2c_bus = TRUE; + + /* Initialize access to QSFP+ I2C bus */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp |= IXGBE_ESDP_SDP0_DIR; + esdp &= ~IXGBE_ESDP_SDP1_DIR; + esdp &= ~IXGBE_ESDP_SDP0; + esdp &= ~IXGBE_ESDP_SDP0_NATIVE; + esdp &= ~IXGBE_ESDP_SDP1_NATIVE; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + + phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_82599; + phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_82599; + } + /* Identify the PHY or SFP module */ + ret_val = phy->ops.identify(hw); + if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto init_phy_ops_out; + + /* Setup function pointers based on detected SFP module and speeds */ + ixgbe_init_mac_link_ops_82599(hw); + if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) + hw->phy.ops.reset = NULL; + + /* If copper media, overwrite with copper function pointers */ + if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) { + mac->ops.setup_link = &ixgbe_setup_copper_link_82599; + mac->ops.get_link_capabilities = + &ixgbe_get_copper_link_capabilities_generic; + } + + /* Set necessary function pointers based on phy type */ + switch (hw->phy.type) { + case ixgbe_phy_tn: + phy->ops.setup_link = &ixgbe_setup_phy_link_tnx; + phy->ops.check_link = &ixgbe_check_phy_link_tnx; + phy->ops.get_firmware_version = + &ixgbe_get_phy_firmware_version_tnx; + break; + default: + break; + } +init_phy_ops_out: + return ret_val; +} + +s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 reg_anlp1 = 0; + u32 i = 0; + u16 list_offset, data_offset, data_value; + + if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) { + ixgbe_init_mac_link_ops_82599(hw); + + hw->phy.ops.reset = NULL; + + ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, + &data_offset); + if (ret_val != 0) + goto setup_sfp_out; + + /* PHY config will finish before releasing the semaphore */ + ret_val = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (ret_val != 0) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto setup_sfp_out; + } + + hw->eeprom.ops.read(hw, ++data_offset, &data_value); + while (data_value != 0xffff) { + IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value); + IXGBE_WRITE_FLUSH(hw); + hw->eeprom.ops.read(hw, ++data_offset, &data_value); + } + + /* Release the semaphore */ + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); + /* Delay obtaining semaphore again to allow FW access */ + msleep(hw->eeprom.semaphore_delay); + + /* Now restart DSP by setting Restart_AN and clearing LMS */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, ((IXGBE_READ_REG(hw, + IXGBE_AUTOC) & ~IXGBE_AUTOC_LMS_MASK) | + IXGBE_AUTOC_AN_RESTART)); + + /* Wait for AN to leave state 0 */ + for (i = 0; i < 10; i++) { + msleep(4); + reg_anlp1 = IXGBE_READ_REG(hw, IXGBE_ANLP1); + if (reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK) + break; + } + if (!(reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)) { + hw_dbg(hw, "sfp module setup not complete\n"); + ret_val = IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + goto setup_sfp_out; + } + + /* Restart DSP by setting Restart_AN and return to SFI mode */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw, + IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL | + IXGBE_AUTOC_AN_RESTART)); + } + +setup_sfp_out: + return ret_val; +} + +/** + * ixgbe_init_ops_82599 - Inits func ptrs and MAC type + * @hw: pointer to hardware structure + * + * Initialize the function pointers and assign the MAC type for 82599. + * Does not touch the hardware. + **/ + +s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val; + + ixgbe_init_phy_ops_generic(hw); + ret_val = ixgbe_init_ops_generic(hw); + + /* PHY */ + phy->ops.identify = &ixgbe_identify_phy_82599; + phy->ops.init = &ixgbe_init_phy_ops_82599; + + /* MAC */ + mac->ops.reset_hw = &ixgbe_reset_hw_82599; + mac->ops.get_media_type = &ixgbe_get_media_type_82599; + mac->ops.get_supported_physical_layer = + &ixgbe_get_supported_physical_layer_82599; + mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic; + mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic; + mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_82599; + mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82599; + mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82599; + mac->ops.start_hw = &ixgbe_start_hw_82599; + mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic; + mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic; + mac->ops.get_device_caps = &ixgbe_get_device_caps_generic; + mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic; + mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic; + + /* RAR, Multicast, VLAN */ + mac->ops.set_vmdq = &ixgbe_set_vmdq_generic; + mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic; + mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic; + mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic; + mac->rar_highwater = 1; + mac->ops.set_vfta = &ixgbe_set_vfta_generic; + mac->ops.set_vlvf = &ixgbe_set_vlvf_generic; + mac->ops.clear_vfta = &ixgbe_clear_vfta_generic; + mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic; + mac->ops.setup_sfp = &ixgbe_setup_sfp_modules_82599; + mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing; + mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing; + + /* Link */ + mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82599; + mac->ops.check_link = &ixgbe_check_mac_link_generic; + mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic; + ixgbe_init_mac_link_ops_82599(hw); + + mac->mcft_size = 128; + mac->vft_size = 128; + mac->num_rar_entries = 128; + mac->rx_pb_size = 512; + mac->max_tx_queues = 128; + mac->max_rx_queues = 128; + mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); + + mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) & + IXGBE_FWSM_MODE_MASK) ? true : false; + + //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf; + + /* EEPROM */ + eeprom->ops.read = &ixgbe_read_eeprom_82599; + eeprom->ops.read_buffer = &ixgbe_read_eeprom_buffer_82599; + + /* Manageability interface */ + mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic; + + mac->ops.get_thermal_sensor_data = + &ixgbe_get_thermal_sensor_data_generic; + mac->ops.init_thermal_sensor_thresh = + &ixgbe_init_thermal_sensor_thresh_generic; + + return ret_val; +} + +/** + * ixgbe_get_link_capabilities_82599 - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @negotiation: true when autoneg or autotry is enabled + * + * Determines the link capabilities by reading the AUTOC register. + **/ +s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *negotiation) +{ + s32 status = 0; + u32 autoc = 0; + + /* Check if 1G SFP module. */ + if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + goto out; + } + + /* + * Determine link capabilities based on the stored value of AUTOC, + * which represents EEPROM defaults. If AUTOC value has not + * been stored, use the current register values. + */ + if (hw->mac.orig_link_settings_stored) + autoc = hw->mac.orig_autoc; + else + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = false; + break; + + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + *negotiation = false; + break; + + case IXGBE_AUTOC_LMS_1G_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + break; + + case IXGBE_AUTOC_LMS_10G_SERIAL: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + *negotiation = false; + break; + + case IXGBE_AUTOC_LMS_KX4_KX_KR: + case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + if (autoc & IXGBE_AUTOC_KR_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX_SUPP) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + break; + + case IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII: + *speed = IXGBE_LINK_SPEED_100_FULL; + if (autoc & IXGBE_AUTOC_KR_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX_SUPP) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + break; + + case IXGBE_AUTOC_LMS_SGMII_1G_100M: + *speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_100_FULL; + *negotiation = false; + break; + + default: + status = IXGBE_ERR_LINK_SETUP; + goto out; + break; + } + + if (hw->phy.multispeed_fiber) { + *speed |= IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + } + +out: + return status; +} + +/** + * ixgbe_get_media_type_82599 - Get media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw) +{ + enum ixgbe_media_type media_type; + + /* Detect if there is a copper PHY attached. */ + switch (hw->phy.type) { + case ixgbe_phy_cu_unknown: + case ixgbe_phy_tn: + media_type = ixgbe_media_type_copper; + goto out; + default: + break; + } + + switch (hw->device_id) { + case IXGBE_DEV_ID_82599_KX4: + case IXGBE_DEV_ID_82599_KX4_MEZZ: + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + case IXGBE_DEV_ID_82599_KR: + case IXGBE_DEV_ID_82599_BACKPLANE_FCOE: + case IXGBE_DEV_ID_82599_XAUI_LOM: + /* Default device ID is mezzanine card KX/KX4 */ + media_type = ixgbe_media_type_backplane; + break; + case IXGBE_DEV_ID_82599_SFP: + case IXGBE_DEV_ID_82599_SFP_FCOE: + case IXGBE_DEV_ID_82599_SFP_EM: + case IXGBE_DEV_ID_82599_SFP_SF2: + case IXGBE_DEV_ID_82599EN_SFP: + media_type = ixgbe_media_type_fiber; + break; + case IXGBE_DEV_ID_82599_CX4: + media_type = ixgbe_media_type_cx4; + break; + case IXGBE_DEV_ID_82599_T3_LOM: + media_type = ixgbe_media_type_copper; + break; + case IXGBE_DEV_ID_82599_LS: + media_type = ixgbe_media_type_fiber_lco; + break; + case IXGBE_DEV_ID_82599_QSFP_SF_QP: + media_type = ixgbe_media_type_fiber_qsfp; + break; + default: + media_type = ixgbe_media_type_unknown; + break; + } +out: + return media_type; +} + +/** + * ixgbe_start_mac_link_82599 - Setup MAC link settings + * @hw: pointer to hardware structure + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Configures link settings based on values in the ixgbe_hw struct. + * Restarts the link. Performs autonegotiation if needed. + **/ +s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete) +{ + u32 autoc_reg; + u32 links_reg = 0; + u32 i; + s32 status = 0; + + /* Restart link */ + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + + /* Only poll for autoneg to complete if specified to do so */ + if (autoneg_wait_to_complete) { + if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_KX_KR || + (autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || + (autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { + for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + if (links_reg & IXGBE_LINKS_KX_AN_COMP) + break; + msleep(100); + } + if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { + status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + hw_dbg(hw, "Autoneg did not complete.\n"); + } + } + } + + /* Add delay to filter out noises during initial link setup */ + msleep(50); + + return status; +} + +/** + * ixgbe_disable_tx_laser_multispeed_fiber - Disable Tx laser + * @hw: pointer to hardware structure + * + * The base drivers may require better control over SFP+ module + * PHY states. This includes selectively shutting down the Tx + * laser on the PHY, effectively halting physical link. + **/ +void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + + /* Disable tx laser; allow 100us to go dark per spec */ + esdp_reg |= IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + udelay(100); +} + +/** + * ixgbe_enable_tx_laser_multispeed_fiber - Enable Tx laser + * @hw: pointer to hardware structure + * + * The base drivers may require better control over SFP+ module + * PHY states. This includes selectively turning on the Tx + * laser on the PHY, effectively starting physical link. + **/ +void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + + /* Enable tx laser; allow 100ms to light up */ + esdp_reg &= ~IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + msleep(100); +} + +/** + * ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser + * @hw: pointer to hardware structure + * + * When the driver changes the link speeds that it can support, + * it sets autotry_restart to true to indicate that we need to + * initiate a new autotry session with the link partner. To do + * so, we set the speed then disable and re-enable the tx laser, to + * alert the link partner that it also needs to restart autotry on its + * end. This is consistent with true clause 37 autoneg, which also + * involves a loss of signal. + **/ +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + if (hw->mac.autotry_restart) { + ixgbe_disable_tx_laser_multispeed_fiber(hw); + ixgbe_enable_tx_laser_multispeed_fiber(hw); + hw->mac.autotry_restart = false; + } +} + +/** + * ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Set the link speed in the AUTOC register and restarts link. + **/ +s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN; + ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN; + u32 speedcnt = 0; + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + u32 i = 0; + bool link_up = false; + bool negotiation; + + /* Mask off requested but non-supported speeds */ + status = ixgbe_get_link_capabilities(hw, &link_speed, &negotiation); + if (status != 0) + return status; + + speed &= link_speed; + + /* + * Try each speed one by one, highest priority first. We do this in + * software because 10gb fiber doesn't support speed autonegotiation. + */ + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { + speedcnt++; + highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL; + + /* If we already have link at this speed, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + return status; + + if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up) + goto out; + + /* Set the module link speed */ + esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5); + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + + /* Allow module to change analog characteristics (1G->10G) */ + msleep(40); + + status = ixgbe_setup_mac_link_82599(hw, + IXGBE_LINK_SPEED_10GB_FULL, + autoneg, + autoneg_wait_to_complete); + if (status != 0) + return status; + + /* Flap the tx laser if it has not already been done */ + ixgbe_flap_tx_laser(hw); + + /* + * Wait for the controller to acquire link. Per IEEE 802.3ap, + * Section 73.10.2, we may have to wait up to 500ms if KR is + * attempted. 82599 uses the same timing for 10g SFI. + */ + for (i = 0; i < 5; i++) { + /* Wait for the link partner to also set speed */ + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, + &link_up, false); + if (status != 0) + return status; + + if (link_up) + goto out; + } + } + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) { + speedcnt++; + if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN) + highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL; + + /* If we already have link at this speed, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + return status; + + if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up) + goto out; + + /* Set the module link speed */ + esdp_reg &= ~IXGBE_ESDP_SDP5; + esdp_reg |= IXGBE_ESDP_SDP5_DIR; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + + /* Allow module to change analog characteristics (10G->1G) */ + msleep(40); + + status = ixgbe_setup_mac_link_82599(hw, + IXGBE_LINK_SPEED_1GB_FULL, + autoneg, + autoneg_wait_to_complete); + if (status != 0) + return status; + + /* Flap the tx laser if it has not already been done */ + ixgbe_flap_tx_laser(hw); + + /* Wait for the link partner to also set speed */ + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + return status; + + if (link_up) + goto out; + } + + /* + * We didn't get link. Configure back to the highest speed we tried, + * (if there was more than one). We call ourselves back with just the + * single highest speed that the user requested. + */ + if (speedcnt > 1) + status = ixgbe_setup_mac_link_multispeed_fiber(hw, + highest_link_speed, autoneg, autoneg_wait_to_complete); + +out: + /* Set autoneg_advertised value based on input link speed */ + hw->phy.autoneg_advertised = 0; + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + return status; +} + +/** + * ixgbe_setup_mac_link_smartspeed - Set MAC link speed using SmartSpeed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Implements the Intel SmartSpeed algorithm. + **/ +s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN; + s32 i, j; + bool link_up = false; + u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + + /* Set autoneg_advertised value based on input link speed */ + hw->phy.autoneg_advertised = 0; + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + if (speed & IXGBE_LINK_SPEED_100_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; + + /* + * Implement Intel SmartSpeed algorithm. SmartSpeed will reduce the + * autoneg advertisement if link is unable to be established at the + * highest negotiated rate. This can sometimes happen due to integrity + * issues with the physical media connection. + */ + + /* First, try to get link with full advertisement */ + hw->phy.smart_speed_active = false; + for (j = 0; j < IXGBE_SMARTSPEED_MAX_RETRIES; j++) { + status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, + autoneg_wait_to_complete); + if (status != 0) + goto out; + + /* + * Wait for the controller to acquire link. Per IEEE 802.3ap, + * Section 73.10.2, we may have to wait up to 500ms if KR is + * attempted, or 200ms if KX/KX4/BX/BX4 is attempted, per + * Table 9 in the AN MAS. + */ + for (i = 0; i < 5; i++) { + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, + false); + if (status != 0) + goto out; + + if (link_up) + goto out; + } + } + + /* + * We didn't get link. If we advertised KR plus one of KX4/KX + * (or BX4/BX), then disable KR and try again. + */ + if (((autoc_reg & IXGBE_AUTOC_KR_SUPP) == 0) || + ((autoc_reg & IXGBE_AUTOC_KX4_KX_SUPP_MASK) == 0)) + goto out; + + /* Turn SmartSpeed on to disable KR support */ + hw->phy.smart_speed_active = true; + status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, + autoneg_wait_to_complete); + if (status != 0) + goto out; + + /* + * Wait for the controller to acquire link. 600ms will allow for + * the AN link_fail_inhibit_timer as well for multiple cycles of + * parallel detect, both 10g and 1g. This allows for the maximum + * connect attempts as defined in the AN MAS table 73-7. + */ + for (i = 0; i < 6; i++) { + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + goto out; + + if (link_up) + goto out; + } + + /* We didn't get link. Turn SmartSpeed back off. */ + hw->phy.smart_speed_active = false; + status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, + autoneg_wait_to_complete); + +out: + if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL)) + hw_dbg(hw, "Smartspeed has downgraded the link speed " + "from the maximum advertised\n"); + return status; +} + +/** + * ixgbe_setup_mac_link_82599 - Set MAC link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Set the link speed in the AUTOC register and restarts link. + **/ +s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + u32 start_autoc = autoc; + u32 orig_autoc = 0; + u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK; + u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; + u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK; + u32 links_reg = 0; + u32 i; + ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN; + + /* Check to see if speed passed in is supported. */ + status = ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg); + if (status != 0) + goto out; + + speed &= link_capabilities; + + if (speed == IXGBE_LINK_SPEED_UNKNOWN) { + status = IXGBE_ERR_LINK_SETUP; + goto out; + } + + /* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/ + if (hw->mac.orig_link_settings_stored) + orig_autoc = hw->mac.orig_autoc; + else + orig_autoc = autoc; + + if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { + /* Set KX4/KX/KR support according to speed requested */ + autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP); + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + if (orig_autoc & IXGBE_AUTOC_KX4_SUPP) + autoc |= IXGBE_AUTOC_KX4_SUPP; + if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) && + (hw->phy.smart_speed_active == false)) + autoc |= IXGBE_AUTOC_KR_SUPP; + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + autoc |= IXGBE_AUTOC_KX_SUPP; + } else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) && + (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN || + link_mode == IXGBE_AUTOC_LMS_1G_AN)) { + /* Switch from 1G SFI to 10G SFI if requested */ + if ((speed == IXGBE_LINK_SPEED_10GB_FULL) && + (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) { + autoc &= ~IXGBE_AUTOC_LMS_MASK; + autoc |= IXGBE_AUTOC_LMS_10G_SERIAL; + } + } else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) && + (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) { + /* Switch from 10G SFI to 1G SFI if requested */ + if ((speed == IXGBE_LINK_SPEED_1GB_FULL) && + (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) { + autoc &= ~IXGBE_AUTOC_LMS_MASK; + if (autoneg) + autoc |= IXGBE_AUTOC_LMS_1G_AN; + else + autoc |= IXGBE_AUTOC_LMS_1G_LINK_NO_AN; + } + } + + if (autoc != start_autoc) { + /* Restart link */ + autoc |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); + + /* Only poll for autoneg to complete if specified to do so */ + if (autoneg_wait_to_complete) { + if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { + for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { + links_reg = + IXGBE_READ_REG(hw, IXGBE_LINKS); + if (links_reg & IXGBE_LINKS_KX_AN_COMP) + break; + msleep(100); + } + if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { + status = + IXGBE_ERR_AUTONEG_NOT_COMPLETE; + hw_dbg(hw, "Autoneg did not complete.\n"); + } + } + } + + /* Add delay to filter out noises during initial link setup */ + msleep(50); + } + +out: + return status; +} + +/** + * ixgbe_setup_copper_link_82599 - Set the PHY autoneg advertised field + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true if waiting is needed to complete + * + * Restarts link on PHY and MAC based on settings passed in. + **/ +static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status; + + /* Setup the PHY according to input speed */ + status = hw->phy.ops.setup_link_speed(hw, speed, autoneg, + autoneg_wait_to_complete); + /* Set up MAC */ + ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete); + + return status; +} + +/** + * ixgbe_reset_hw_82599 - Perform hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks + * and clears all interrupts, perform a PHY reset, and perform a link (MAC) + * reset. + **/ +s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) +{ +// ixgbe_link_speed link_speed; + s32 status = 0; +// u32 ctrl, i, autoc, autoc2; +// bool link_up = false; + +#if 0 + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.ops.stop_adapter(hw); + if (status != 0) + goto reset_hw_out; + + /* flush pending Tx transactions */ + ixgbe_clear_tx_pending(hw); + + /* PHY ops must be identified and initialized prior to reset */ + + /* Identify PHY and related function pointers */ + status = hw->phy.ops.init(hw); + + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + + /* Setup SFP module if there is one present. */ + if (hw->phy.sfp_setup_needed) { + status = hw->mac.ops.setup_sfp(hw); + hw->phy.sfp_setup_needed = false; + } + + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + + /* Reset PHY */ + if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL) + hw->phy.ops.reset(hw); + +mac_reset_top: + /* + * Issue global reset to the MAC. Needs to be SW reset if link is up. + * If link reset is used when link is up, it might reset the PHY when + * mng is using it. If link is down or the flag to force full link + * reset is set, then perform link reset. + */ + ctrl = IXGBE_CTRL_LNK_RST; + if (!hw->force_full_reset) { + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + if (link_up) + ctrl = IXGBE_CTRL_RST; + } + + ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + udelay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST_MASK)) + break; + } + + if (ctrl & IXGBE_CTRL_RST_MASK) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "Reset polling failed to complete.\n"); + } + + msleep(50); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + /* + * Store the original AUTOC/AUTOC2 values if they have not been + * stored off yet. Otherwise restore the stored original + * values since the reset operation sets back to defaults. + */ + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + if (hw->mac.orig_link_settings_stored == false) { + hw->mac.orig_autoc = autoc; + hw->mac.orig_autoc2 = autoc2; + hw->mac.orig_link_settings_stored = true; + } else { + if (autoc != hw->mac.orig_autoc) + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc | + IXGBE_AUTOC_AN_RESTART)); + + if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) != + (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) { + autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK; + autoc2 |= (hw->mac.orig_autoc2 & + IXGBE_AUTOC2_UPPER_MASK); + IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2); + } + } +#endif + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* + * Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table. Also reset num_rar_entries to 128, + * since we modify this value when programming the SAN MAC address. + */ + hw->mac.num_rar_entries = 128; + hw->mac.ops.init_rx_addrs(hw); + + /* Store the permanent SAN mac address */ + hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); + + /* Add the SAN MAC address to the RAR only if it's a valid address */ + if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { + hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, + hw->mac.san_addr, 0, IXGBE_RAH_AV); + + /* Save the SAN MAC RAR index */ + hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + + /* Reserve the last RAR for the SAN MAC address */ + hw->mac.num_rar_entries--; + } + + /* Store the alternative WWNN/WWPN prefix */ + hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix, + &hw->mac.wwpn_prefix); + +//reset_hw_out: + return status; +} + +/** + * ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables. + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) +{ + int i; + u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL); + fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE; + + /* + * Before starting reinitialization process, + * FDIRCMD.CMD must be zero. + */ + for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) { + if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & + IXGBE_FDIRCMD_CMD_MASK)) + break; + udelay(10); + } + if (i >= IXGBE_FDIRCMD_CMD_POLL) { + hw_dbg(hw, "Flow Director previous command isn't complete, " + "aborting table re-initialization.\n"); + return IXGBE_ERR_FDIR_REINIT_FAILED; + } + + IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0); + IXGBE_WRITE_FLUSH(hw); + /* + * 82599 adapters flow director init flow cannot be restarted, + * Workaround 82599 silicon errata by performing the following steps + * before re-writing the FDIRCTRL control register with the same value. + * - write 1 to bit 8 of FDIRCMD register & + * - write 0 to bit 8 of FDIRCMD register + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) | + IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & + ~IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + /* + * Clear FDIR Hash register to clear any leftover hashes + * waiting to be programmed. + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00); + IXGBE_WRITE_FLUSH(hw); + + IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll init-done after we write FDIRCTRL register */ + for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) { + if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) & + IXGBE_FDIRCTRL_INIT_DONE) + break; + udelay(10); + } + if (i >= IXGBE_FDIR_INIT_DONE_POLL) { + hw_dbg(hw, "Flow Director Signature poll time exceeded!\n"); + return IXGBE_ERR_FDIR_REINIT_FAILED; + } + + /* Clear FDIR statistics registers (read to clear) */ + IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT); + IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT); + IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); + IXGBE_READ_REG(hw, IXGBE_FDIRMISS); + IXGBE_READ_REG(hw, IXGBE_FDIRLEN); + + return 0; +} + +/** + * ixgbe_fdir_enable_82599 - Initialize Flow Director control registers + * @hw: pointer to hardware structure + * @fdirctrl: value to write to flow director control register + **/ +static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl) +{ + int i; + + /* Prime the keys for hashing */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY); + IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY); + + /* + * Poll init-done after we write the register. Estimated times: + * 10G: PBALLOC = 11b, timing is 60us + * 1G: PBALLOC = 11b, timing is 600us + * 100M: PBALLOC = 11b, timing is 6ms + * + * Multiple these timings by 4 if under full Rx load + * + * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for + * 1 msec per poll time. If we're at line rate and drop to 100M, then + * this might not finish in our poll time, but we can live with that + * for now. + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl); + IXGBE_WRITE_FLUSH(hw); + for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) { + if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) & + IXGBE_FDIRCTRL_INIT_DONE) + break; + msleep(1); + } + + if (i >= IXGBE_FDIR_INIT_DONE_POLL) + hw_dbg(hw, "Flow Director poll time exceeded!\n"); +} + +/** + * ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters + * @hw: pointer to hardware structure + * @fdirctrl: value to write to flow director control register, initially + * contains just the value of the Rx packet buffer allocation + **/ +s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl) +{ + /* + * Continue setup of fdirctrl register bits: + * Move the flexible bytes to use the ethertype - shift 6 words + * Set the maximum length per hash bucket to 0xA filters + * Send interrupt when 64 filters are left + */ + fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) | + (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) | + (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT); + + /* write hashes and fdirctrl register, poll for completion */ + ixgbe_fdir_enable_82599(hw, fdirctrl); + + return 0; +} + +/** + * ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters + * @hw: pointer to hardware structure + * @fdirctrl: value to write to flow director control register, initially + * contains just the value of the Rx packet buffer allocation + **/ +s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl) +{ + /* + * Continue setup of fdirctrl register bits: + * Turn perfect match filtering on + * Report hash in RSS field of Rx wb descriptor + * Initialize the drop queue + * Move the flexible bytes to use the ethertype - shift 6 words + * Set the maximum length per hash bucket to 0xA filters + * Send interrupt when 64 (0x4 * 16) filters are left + */ + fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH | + IXGBE_FDIRCTRL_REPORT_STATUS | + (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) | + (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) | + (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) | + (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT); + + /* write hashes and fdirctrl register, poll for completion */ + ixgbe_fdir_enable_82599(hw, fdirctrl); + + return 0; +} + +/* + * These defines allow us to quickly generate all of the necessary instructions + * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION + * for values 0 through 15 + */ +#define IXGBE_ATR_COMMON_HASH_KEY \ + (IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY) +#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \ +do { \ + u32 n = (_n); \ + if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \ + common_hash ^= lo_hash_dword >> n; \ + else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ + bucket_hash ^= lo_hash_dword >> n; \ + else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \ + sig_hash ^= lo_hash_dword << (16 - n); \ + if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \ + common_hash ^= hi_hash_dword >> n; \ + else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ + bucket_hash ^= hi_hash_dword >> n; \ + else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \ + sig_hash ^= hi_hash_dword << (16 - n); \ +} while (0); + +/** + * ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash + * @stream: input bitstream to compute the hash on + * + * This function is almost identical to the function above but contains + * several optomizations such as unwinding all of the loops, letting the + * compiler work out all of the conditional ifs since the keys are static + * defines, and computing two keys at once since the hashed dword stream + * will be the same for both keys. + **/ +u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common) +{ + u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan; + u32 sig_hash = 0, bucket_hash = 0, common_hash = 0; + + /* record the flow_vm_vlan bits as they are a key part to the hash */ + flow_vm_vlan = IXGBE_NTOHL(input.dword); + + /* generate common hash dword */ + hi_hash_dword = IXGBE_NTOHL(common.dword); + + /* low dword is word swapped version of common */ + lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16); + + /* apply flow ID/VM pool/VLAN ID bits to hash words */ + hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16); + + /* Process bits 0 and 16 */ + IXGBE_COMPUTE_SIG_HASH_ITERATION(0); + + /* + * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to + * delay this because bit 0 of the stream should not be processed + * so we do not add the vlan until after bit 0 was processed + */ + lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16); + + /* Process remaining 30 bit of the key */ + IXGBE_COMPUTE_SIG_HASH_ITERATION(1); + IXGBE_COMPUTE_SIG_HASH_ITERATION(2); + IXGBE_COMPUTE_SIG_HASH_ITERATION(3); + IXGBE_COMPUTE_SIG_HASH_ITERATION(4); + IXGBE_COMPUTE_SIG_HASH_ITERATION(5); + IXGBE_COMPUTE_SIG_HASH_ITERATION(6); + IXGBE_COMPUTE_SIG_HASH_ITERATION(7); + IXGBE_COMPUTE_SIG_HASH_ITERATION(8); + IXGBE_COMPUTE_SIG_HASH_ITERATION(9); + IXGBE_COMPUTE_SIG_HASH_ITERATION(10); + IXGBE_COMPUTE_SIG_HASH_ITERATION(11); + IXGBE_COMPUTE_SIG_HASH_ITERATION(12); + IXGBE_COMPUTE_SIG_HASH_ITERATION(13); + IXGBE_COMPUTE_SIG_HASH_ITERATION(14); + IXGBE_COMPUTE_SIG_HASH_ITERATION(15); + + /* combine common_hash result with signature and bucket hashes */ + bucket_hash ^= common_hash; + bucket_hash &= IXGBE_ATR_HASH_MASK; + + sig_hash ^= common_hash << 16; + sig_hash &= IXGBE_ATR_HASH_MASK << 16; + + /* return completed signature hash */ + return sig_hash ^ bucket_hash; +} + +/** + * ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter + * @hw: pointer to hardware structure + * @input: unique input dword + * @common: compressed common input dword + * @queue: queue index to direct traffic to + **/ +s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common, + u8 queue) +{ + u64 fdirhashcmd; + u32 fdircmd; + + /* + * Get the flow_type in order to program FDIRCMD properly + * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 + */ + switch (input.formatted.flow_type) { + case IXGBE_ATR_FLOW_TYPE_TCPV4: + case IXGBE_ATR_FLOW_TYPE_UDPV4: + case IXGBE_ATR_FLOW_TYPE_SCTPV4: + case IXGBE_ATR_FLOW_TYPE_TCPV6: + case IXGBE_ATR_FLOW_TYPE_UDPV6: + case IXGBE_ATR_FLOW_TYPE_SCTPV6: + break; + default: + hw_dbg(hw, " Error on flow type input\n"); + return IXGBE_ERR_CONFIG; + } + + /* configure FDIRCMD register */ + fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | + IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; + fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; + fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; + + /* + * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits + * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH. + */ + fdirhashcmd = (u64)fdircmd << 32; + fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common); + IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd); + + hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd); + + return 0; +} + +#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \ +do { \ + u32 n = (_n); \ + if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ + bucket_hash ^= lo_hash_dword >> n; \ + if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ + bucket_hash ^= hi_hash_dword >> n; \ +} while (0); + +/** + * ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash + * @atr_input: input bitstream to compute the hash on + * @input_mask: mask for the input bitstream + * + * This function serves two main purposes. First it applys the input_mask + * to the atr_input resulting in a cleaned up atr_input data stream. + * Secondly it computes the hash and stores it in the bkt_hash field at + * the end of the input byte stream. This way it will be available for + * future use without needing to recompute the hash. + **/ +void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, + union ixgbe_atr_input *input_mask) +{ + + u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan; + u32 bucket_hash = 0; + + /* Apply masks to input data */ + input->dword_stream[0] &= input_mask->dword_stream[0]; + input->dword_stream[1] &= input_mask->dword_stream[1]; + input->dword_stream[2] &= input_mask->dword_stream[2]; + input->dword_stream[3] &= input_mask->dword_stream[3]; + input->dword_stream[4] &= input_mask->dword_stream[4]; + input->dword_stream[5] &= input_mask->dword_stream[5]; + input->dword_stream[6] &= input_mask->dword_stream[6]; + input->dword_stream[7] &= input_mask->dword_stream[7]; + input->dword_stream[8] &= input_mask->dword_stream[8]; + input->dword_stream[9] &= input_mask->dword_stream[9]; + input->dword_stream[10] &= input_mask->dword_stream[10]; + + /* record the flow_vm_vlan bits as they are a key part to the hash */ + flow_vm_vlan = IXGBE_NTOHL(input->dword_stream[0]); + + /* generate common hash dword */ + hi_hash_dword = IXGBE_NTOHL(input->dword_stream[1] ^ + input->dword_stream[2] ^ + input->dword_stream[3] ^ + input->dword_stream[4] ^ + input->dword_stream[5] ^ + input->dword_stream[6] ^ + input->dword_stream[7] ^ + input->dword_stream[8] ^ + input->dword_stream[9] ^ + input->dword_stream[10]); + + /* low dword is word swapped version of common */ + lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16); + + /* apply flow ID/VM pool/VLAN ID bits to hash words */ + hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16); + + /* Process bits 0 and 16 */ + IXGBE_COMPUTE_BKT_HASH_ITERATION(0); + + /* + * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to + * delay this because bit 0 of the stream should not be processed + * so we do not add the vlan until after bit 0 was processed + */ + lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16); + + /* Process remaining 30 bit of the key */ + IXGBE_COMPUTE_BKT_HASH_ITERATION(1); + IXGBE_COMPUTE_BKT_HASH_ITERATION(2); + IXGBE_COMPUTE_BKT_HASH_ITERATION(3); + IXGBE_COMPUTE_BKT_HASH_ITERATION(4); + IXGBE_COMPUTE_BKT_HASH_ITERATION(5); + IXGBE_COMPUTE_BKT_HASH_ITERATION(6); + IXGBE_COMPUTE_BKT_HASH_ITERATION(7); + IXGBE_COMPUTE_BKT_HASH_ITERATION(8); + IXGBE_COMPUTE_BKT_HASH_ITERATION(9); + IXGBE_COMPUTE_BKT_HASH_ITERATION(10); + IXGBE_COMPUTE_BKT_HASH_ITERATION(11); + IXGBE_COMPUTE_BKT_HASH_ITERATION(12); + IXGBE_COMPUTE_BKT_HASH_ITERATION(13); + IXGBE_COMPUTE_BKT_HASH_ITERATION(14); + IXGBE_COMPUTE_BKT_HASH_ITERATION(15); + + /* + * Limit hash to 13 bits since max bucket count is 8K. + * Store result at the end of the input stream. + */ + input->formatted.bkt_hash = bucket_hash & 0x1FFF; +} + +/** + * ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks + * @input_mask: mask to be bit swapped + * + * The source and destination port masks for flow director are bit swapped + * in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc. In order to + * generate a correctly swapped value we need to bit swap the mask and that + * is what is accomplished by this function. + **/ +static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask) +{ + u32 mask = IXGBE_NTOHS(input_mask->formatted.dst_port); + mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT; + mask |= IXGBE_NTOHS(input_mask->formatted.src_port); + mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1); + mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2); + mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4); + return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8); +} + +/* + * These two macros are meant to address the fact that we have registers + * that are either all or in part big-endian. As a result on big-endian + * systems we will end up byte swapping the value to little-endian before + * it is byte swapped again and written to the hardware in the original + * big-endian format. + */ +#define IXGBE_STORE_AS_BE32(_value) \ + (((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \ + (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24)) + +#define IXGBE_WRITE_REG_BE32(a, reg, value) \ + IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(IXGBE_NTOHL(value))) + +#define IXGBE_STORE_AS_BE16(_value) \ + IXGBE_NTOHS(((u16)(_value) >> 8) | ((u16)(_value) << 8)) + +s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input_mask) +{ + /* mask IPv6 since it is currently not supported */ + u32 fdirm = IXGBE_FDIRM_DIPv6; + u32 fdirtcpm; + + /* + * Program the relevant mask registers. If src/dst_port or src/dst_addr + * are zero, then assume a full mask for that field. Also assume that + * a VLAN of 0 is unspecified, so mask that out as well. L4type + * cannot be masked out in this implementation. + * + * This also assumes IPv4 only. IPv6 masking isn't supported at this + * point in time. + */ + + /* verify bucket hash is cleared on hash generation */ + if (input_mask->formatted.bkt_hash) + hw_dbg(hw, " bucket hash should always be 0 in mask\n"); + + /* Program FDIRM and verify partial masks */ + switch (input_mask->formatted.vm_pool & 0x7F) { + case 0x0: + fdirm |= IXGBE_FDIRM_POOL; + case 0x7F: + break; + default: + hw_dbg(hw, " Error on vm pool mask\n"); + return IXGBE_ERR_CONFIG; + } + + switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) { + case 0x0: + fdirm |= IXGBE_FDIRM_L4P; + if (input_mask->formatted.dst_port || + input_mask->formatted.src_port) { + hw_dbg(hw, " Error on src/dst port mask\n"); + return IXGBE_ERR_CONFIG; + } + case IXGBE_ATR_L4TYPE_MASK: + break; + default: + hw_dbg(hw, " Error on flow type mask\n"); + return IXGBE_ERR_CONFIG; + } + + switch (IXGBE_NTOHS(input_mask->formatted.vlan_id) & 0xEFFF) { + case 0x0000: + /* mask VLAN ID, fall through to mask VLAN priority */ + fdirm |= IXGBE_FDIRM_VLANID; + case 0x0FFF: + /* mask VLAN priority */ + fdirm |= IXGBE_FDIRM_VLANP; + break; + case 0xE000: + /* mask VLAN ID only, fall through */ + fdirm |= IXGBE_FDIRM_VLANID; + case 0xEFFF: + /* no VLAN fields masked */ + break; + default: + hw_dbg(hw, " Error on VLAN mask\n"); + return IXGBE_ERR_CONFIG; + } + + switch (input_mask->formatted.flex_bytes & 0xFFFF) { + case 0x0000: + /* Mask Flex Bytes, fall through */ + fdirm |= IXGBE_FDIRM_FLEX; + case 0xFFFF: + break; + default: + hw_dbg(hw, " Error on flexible byte mask\n"); + return IXGBE_ERR_CONFIG; + } + + /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); + + /* store the TCP/UDP port masks, bit reversed from port layout */ + fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask); + + /* write both the same so that UDP and TCP use the same mask */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm); + IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm); + + /* store source and destination IP masks (big-enian) */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M, + ~input_mask->formatted.src_ip[0]); + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M, + ~input_mask->formatted.dst_ip[0]); + + return 0; +} + +s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id, u8 queue) +{ + u32 fdirport, fdirvlan, fdirhash, fdircmd; + + /* currently IPv6 is not supported, must be programmed with 0 */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0), + input->formatted.src_ip[0]); + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1), + input->formatted.src_ip[1]); + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2), + input->formatted.src_ip[2]); + + /* record the source address (big-endian) */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]); + + /* record the first 32 bits of the destination address (big-endian) */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]); + + /* record source and destination port (little-endian)*/ + fdirport = IXGBE_NTOHS(input->formatted.dst_port); + fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT; + fdirport |= IXGBE_NTOHS(input->formatted.src_port); + IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport); + + /* record vlan (little-endian) and flex_bytes(big-endian) */ + fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes); + fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT; + fdirvlan |= IXGBE_NTOHS(input->formatted.vlan_id); + IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan); + + /* configure FDIRHASH register */ + fdirhash = input->formatted.bkt_hash; + fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); + + /* + * flush all previous writes to make certain registers are + * programmed prior to issuing the command + */ + IXGBE_WRITE_FLUSH(hw); + + /* configure FDIRCMD register */ + fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | + IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; + if (queue == IXGBE_FDIR_DROP_QUEUE) + fdircmd |= IXGBE_FDIRCMD_DROP; + fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; + fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; + fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT; + + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd); + + return 0; +} + +s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id) +{ + u32 fdirhash; + u32 fdircmd = 0; + u32 retry_count; + s32 err = 0; + + /* configure FDIRHASH register */ + fdirhash = input->formatted.bkt_hash; + fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); + + /* flush hash to HW */ + IXGBE_WRITE_FLUSH(hw); + + /* Query if filter is present */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT); + + for (retry_count = 10; retry_count; retry_count--) { + /* allow 10us for query to process */ + udelay(10); + /* verify query completed successfully */ + fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD); + if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK)) + break; + } + + if (!retry_count) + err = IXGBE_ERR_FDIR_REINIT_FAILED; + + /* if filter exists in hardware then remove it */ + if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) { + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + IXGBE_FDIRCMD_CMD_REMOVE_FLOW); + } + + return err; +} + +/** + * ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter + * @hw: pointer to hardware structure + * @input: input bitstream + * @input_mask: mask for the input bitstream + * @soft_id: software index for the filters + * @queue: queue index to direct traffic to + * + * Note that the caller to this function must lock before calling, since the + * hardware writes must be protected from one another. + **/ +s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + union ixgbe_atr_input *input_mask, + u16 soft_id, u8 queue) +{ + s32 err = IXGBE_ERR_CONFIG; + + /* + * Check flow_type formatting, and bail out before we touch the hardware + * if there's a configuration issue + */ + switch (input->formatted.flow_type) { + case IXGBE_ATR_FLOW_TYPE_IPV4: + input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK; + if (input->formatted.dst_port || input->formatted.src_port) { + hw_dbg(hw, " Error on src/dst port\n"); + return IXGBE_ERR_CONFIG; + } + break; + case IXGBE_ATR_FLOW_TYPE_SCTPV4: + if (input->formatted.dst_port || input->formatted.src_port) { + hw_dbg(hw, " Error on src/dst port\n"); + return IXGBE_ERR_CONFIG; + } + case IXGBE_ATR_FLOW_TYPE_TCPV4: + case IXGBE_ATR_FLOW_TYPE_UDPV4: + input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | + IXGBE_ATR_L4TYPE_MASK; + break; + default: + hw_dbg(hw, " Error on flow type input\n"); + return err; + } + + /* program input mask into the HW */ + err = ixgbe_fdir_set_input_mask_82599(hw, input_mask); + if (err) + return err; + + /* apply mask and compute/store hash */ + ixgbe_atr_compute_perfect_hash_82599(input, input_mask); + + /* program filters to filter memory */ + return ixgbe_fdir_write_perfect_filter_82599(hw, input, + soft_id, queue); +} + +/** + * ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register + * @hw: pointer to hardware structure + * @reg: analog register to read + * @val: read value + * + * Performs read operation to Omer analog register specified. + **/ +s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val) +{ + u32 core_ctl; + + IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD | + (reg << 8)); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL); + *val = (u8)core_ctl; + + return 0; +} + +/** + * ixgbe_write_analog_reg8_82599 - Writes 8 bit Omer analog register + * @hw: pointer to hardware structure + * @reg: atlas register to write + * @val: value to write + * + * Performs write operation to Omer analog register specified. + **/ +s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val) +{ + u32 core_ctl; + + core_ctl = (reg << 8) | val; + IXGBE_WRITE_REG(hw, IXGBE_CORECTL, core_ctl); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + + return 0; +} + +/** + * ixgbe_start_hw_82599 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function + * and the generation start_hw function. + * Then performs revision-specific operations, if any. + **/ +s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + + ret_val = ixgbe_start_hw_generic(hw); + if (ret_val != 0) + goto out; + + ret_val = ixgbe_start_hw_gen2(hw); + if (ret_val != 0) + goto out; + + /* We need to run link autotry after the driver loads */ + hw->mac.autotry_restart = true; + + if (ret_val == 0) + ret_val = ixgbe_verify_fw_version_82599(hw); +out: + return ret_val; +} + +/** + * ixgbe_identify_phy_82599 - Get physical layer module + * @hw: pointer to hardware structure + * + * Determines the physical layer module found on the current adapter. + * If PHY already detected, maintains current PHY type in hw struct, + * otherwise executes the PHY detection routine. + **/ +s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + + /* Detect PHY if not unknown - returns success if already detected. */ + status = ixgbe_identify_phy_generic(hw); + if (status != 0) { + /* 82599 10GBASE-T requires an external PHY */ + if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper) + goto out; + else + status = ixgbe_identify_module_generic(hw); + } + + /* Set PHY type none if no PHY detected */ + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.type = ixgbe_phy_none; + status = 0; + } + + /* Return error if SFP module has been detected but is not supported */ + if (hw->phy.type == ixgbe_phy_sfp_unsupported) + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + +out: + return status; +} + +/** + * ixgbe_get_supported_physical_layer_82599 - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw) +{ + u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK; + u32 pma_pmd_10g_parallel = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK; + u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; + u16 ext_ability = 0; + u8 comp_codes_10g = 0; + u8 comp_codes_1g = 0; + + hw->phy.ops.identify(hw); + + switch (hw->phy.type) { + case ixgbe_phy_tn: + case ixgbe_phy_cu_unknown: + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; + if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + goto out; + default: + break; + } + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_AN: + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + if (pma_pmd_1g == IXGBE_AUTOC_1G_KX_BX) { + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX | + IXGBE_PHYSICAL_LAYER_1000BASE_BX; + goto out; + } else + /* SFI mode so read SFP module */ + goto sfp_check; + break; + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4; + else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI; + goto out; + break; + case IXGBE_AUTOC_LMS_10G_SERIAL: + if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) { + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR; + goto out; + } else if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) + goto sfp_check; + break; + case IXGBE_AUTOC_LMS_KX4_KX_KR: + case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN: + if (autoc & IXGBE_AUTOC_KX_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + if (autoc & IXGBE_AUTOC_KR_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KR; + goto out; + break; + default: + goto out; + break; + } + +sfp_check: + /* SFP check must be done last since DA modules are sometimes used to + * test KR mode - we need to id KR mode correctly before SFP module. + * Call identify_sfp because the pluggable module may have changed */ + hw->phy.ops.identify_sfp(hw); + if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) + goto out; + + switch (hw->phy.type) { + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; + break; + case ixgbe_phy_sfp_ftl_active: + case ixgbe_phy_sfp_active_unknown: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA; + break; + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_1GBE_COMP_CODES, &comp_codes_1g); + hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_10GBE_COMP_CODES, &comp_codes_10g); + if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; + else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; + else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_T; + else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_SX; + break; + default: + break; + } + +out: + return physical_layer; +} + +/** + * ixgbe_enable_rx_dma_82599 - Enable the Rx DMA unit on 82599 + * @hw: pointer to hardware structure + * @regval: register value to write to RXCTRL + * + * Enables the Rx DMA unit for 82599 + **/ +s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval) +{ + + /* + * Workaround for 82599 silicon errata when enabling the Rx datapath. + * If traffic is incoming before we enable the Rx unit, it could hang + * the Rx DMA unit. Therefore, make sure the security engine is + * completely disabled prior to enabling the Rx unit. + */ + + hw->mac.ops.disable_sec_rx_path(hw); + + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval); + + hw->mac.ops.enable_sec_rx_path(hw); + + return 0; +} + +/** + * ixgbe_verify_fw_version_82599 - verify fw version for 82599 + * @hw: pointer to hardware structure + * + * Verifies that installed the firmware version is 0.6 or higher + * for SFI devices. All 82599 SFI devices should have version 0.6 or higher. + * + * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or + * if the FW version is not supported. + **/ +static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_EEPROM_VERSION; + u16 fw_offset, fw_ptp_cfg_offset; + u16 fw_version = 0; + + /* firmware check is only necessary for SFI devices */ + if (hw->phy.media_type != ixgbe_media_type_fiber) { + status = 0; + goto fw_version_out; + } + + /* get the offset to the Firmware Module block */ + hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); + + if ((fw_offset == 0) || (fw_offset == 0xFFFF)) + goto fw_version_out; + + /* get the offset to the Pass Through Patch Configuration block */ + hw->eeprom.ops.read(hw, (fw_offset + + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR), + &fw_ptp_cfg_offset); + + if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF)) + goto fw_version_out; + + /* get the firmware version */ + hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset + + IXGBE_FW_PATCH_VERSION_4), &fw_version); + + if (fw_version > 0x5) + status = 0; + +fw_version_out: + return status; +} + +/** + * ixgbe_verify_lesm_fw_enabled_82599 - Checks LESM FW module state. + * @hw: pointer to hardware structure + * + * Returns true if the LESM FW module is present and enabled. Otherwise + * returns false. Smart Speed must be disabled if LESM FW module is enabled. + **/ +bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw) +{ + bool lesm_enabled = false; + u16 fw_offset, fw_lesm_param_offset, fw_lesm_state; + s32 status; + + /* get the offset to the Firmware Module block */ + status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); + + if ((status != 0) || + (fw_offset == 0) || (fw_offset == 0xFFFF)) + goto out; + + /* get the offset to the LESM Parameters block */ + status = hw->eeprom.ops.read(hw, (fw_offset + + IXGBE_FW_LESM_PARAMETERS_PTR), + &fw_lesm_param_offset); + + if ((status != 0) || + (fw_lesm_param_offset == 0) || (fw_lesm_param_offset == 0xFFFF)) + goto out; + + /* get the lesm state word */ + status = hw->eeprom.ops.read(hw, (fw_lesm_param_offset + + IXGBE_FW_LESM_STATE_1), + &fw_lesm_state); + + if ((status == 0) && + (fw_lesm_state & IXGBE_FW_LESM_STATE_ENABLED)) + lesm_enabled = true; + +out: + return lesm_enabled; +} + +/** + * ixgbe_read_eeprom_buffer_82599 - Read EEPROM word(s) using + * fastest available method + * + * @hw: pointer to hardware structure + * @offset: offset of word in EEPROM to read + * @words: number of words + * @data: word(s) read from the EEPROM + * + * Retrieves 16 bit word(s) read from EEPROM + **/ +static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val = IXGBE_ERR_CONFIG; + + /* + * If EEPROM is detected and can be addressed using 14 bits, + * use EERD otherwise use bit bang + */ + if ((eeprom->type == ixgbe_eeprom_spi) && + (offset + (words - 1) <= IXGBE_EERD_MAX_ADDR)) + ret_val = ixgbe_read_eerd_buffer_generic(hw, offset, words, + data); + else + ret_val = ixgbe_read_eeprom_buffer_bit_bang_generic(hw, offset, + words, + data); + + return ret_val; +} + +/** + * ixgbe_read_eeprom_82599 - Read EEPROM word using + * fastest available method + * + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM + **/ +static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw, + u16 offset, u16 *data) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val = IXGBE_ERR_CONFIG; + + /* + * If EEPROM is detected and can be addressed using 14 bits, + * use EERD otherwise use bit bang + */ + if ((eeprom->type == ixgbe_eeprom_spi) && + (offset <= IXGBE_EERD_MAX_ADDR)) + ret_val = ixgbe_read_eerd_generic(hw, offset, data); + else + ret_val = ixgbe_read_eeprom_bit_bang_generic(hw, offset, data); + + return ret_val; +} + +/** + * ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + u32 esdp; + s32 status; + s32 timeout = 200; + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Acquire I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp |= IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + + while (timeout) { + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + if (esdp & IXGBE_ESDP_SDP1) + break; + + msleep(5); + timeout--; + } + + if (!timeout) { + hw_dbg(hw, "Driver can't access resource," + " acquiring I2C bus timeout.\n"); + status = IXGBE_ERR_I2C; + goto release_i2c_access; + } + } + + status = ixgbe_read_i2c_byte_generic(hw, byte_offset, dev_addr, data); + +release_i2c_access: + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Release I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp &= ~IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + } + + return status; +} + +/** + * ixgbe_write_i2c_byte_82599 - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + u32 esdp; + s32 status; + s32 timeout = 200; + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Acquire I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp |= IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + + while (timeout) { + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + if (esdp & IXGBE_ESDP_SDP1) + break; + + msleep(5); + timeout--; + } + + if (!timeout) { + hw_dbg(hw, "Driver can't access resource," + " acquiring I2C bus timeout.\n"); + status = IXGBE_ERR_I2C; + goto release_i2c_access; + } + } + + status = ixgbe_write_i2c_byte_generic(hw, byte_offset, dev_addr, data); + +release_i2c_access: + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Release I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp &= ~IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + } + + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h new file mode 100644 index 00000000..02be92ab --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h @@ -0,0 +1,58 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_82599_H_ +#define _IXGBE_82599_H_ + +s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *autoneg); +enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw); +void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); +void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); +s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete); +s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete); +s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete); +s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, bool autoneg_wait_to_complete); +s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw); +void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw); +s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw); +s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val); +s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val); +s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw); +s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw); +s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw); +u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw); +s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval); +bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw); +#endif /* _IXGBE_82599_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c new file mode 100644 index 00000000..ef7ce629 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c @@ -0,0 +1,1157 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_api.h" +#include "ixgbe_common.h" + +/** + * ixgbe_init_shared_code - Initialize the shared code + * @hw: pointer to hardware structure + * + * This will assign function pointers and assign the MAC type and PHY code. + * Does not touch the hardware. This function must be called prior to any + * other function in the shared code. The ixgbe_hw structure should be + * memset to 0 prior to calling this function. The following fields in + * hw structure should be filled in prior to calling this function: + * hw_addr, back, device_id, vendor_id, subsystem_device_id, + * subsystem_vendor_id, and revision_id + **/ +s32 ixgbe_init_shared_code(struct ixgbe_hw *hw) +{ + s32 status; + + /* + * Set the mac type + */ + ixgbe_set_mac_type(hw); + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + status = ixgbe_init_ops_82598(hw); + break; + case ixgbe_mac_82599EB: + status = ixgbe_init_ops_82599(hw); + break; + case ixgbe_mac_X540: + status = ixgbe_init_ops_X540(hw); + break; + default: + status = IXGBE_ERR_DEVICE_NOT_SUPPORTED; + break; + } + + return status; +} + +/** + * ixgbe_set_mac_type - Sets MAC type + * @hw: pointer to the HW structure + * + * This function sets the mac type of the adapter based on the + * vendor ID and device ID stored in the hw structure. + **/ +s32 ixgbe_set_mac_type(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + + if (hw->vendor_id == IXGBE_INTEL_VENDOR_ID) { + switch (hw->device_id) { + case IXGBE_DEV_ID_82598: + case IXGBE_DEV_ID_82598_BX: + case IXGBE_DEV_ID_82598AF_SINGLE_PORT: + case IXGBE_DEV_ID_82598AF_DUAL_PORT: + case IXGBE_DEV_ID_82598AT: + case IXGBE_DEV_ID_82598AT2: + case IXGBE_DEV_ID_82598EB_CX4: + case IXGBE_DEV_ID_82598_CX4_DUAL_PORT: + case IXGBE_DEV_ID_82598_DA_DUAL_PORT: + case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: + case IXGBE_DEV_ID_82598EB_XF_LR: + case IXGBE_DEV_ID_82598EB_SFP_LOM: + hw->mac.type = ixgbe_mac_82598EB; + break; + case IXGBE_DEV_ID_82599_KX4: + case IXGBE_DEV_ID_82599_KX4_MEZZ: + case IXGBE_DEV_ID_82599_XAUI_LOM: + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + case IXGBE_DEV_ID_82599_KR: + case IXGBE_DEV_ID_82599_SFP: + case IXGBE_DEV_ID_82599_BACKPLANE_FCOE: + case IXGBE_DEV_ID_82599_SFP_FCOE: + case IXGBE_DEV_ID_82599_SFP_EM: + case IXGBE_DEV_ID_82599_SFP_SF2: + case IXGBE_DEV_ID_82599_QSFP_SF_QP: + case IXGBE_DEV_ID_82599EN_SFP: + case IXGBE_DEV_ID_82599_CX4: + case IXGBE_DEV_ID_82599_LS: + case IXGBE_DEV_ID_82599_T3_LOM: + hw->mac.type = ixgbe_mac_82599EB; + break; + case IXGBE_DEV_ID_X540T: + hw->mac.type = ixgbe_mac_X540; + break; + default: + ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED; + break; + } + } else { + ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED; + } + + hw_dbg(hw, "ixgbe_set_mac_type found mac: %d, returns: %d\n", + hw->mac.type, ret_val); + return ret_val; +} + +/** + * ixgbe_init_hw - Initialize the hardware + * @hw: pointer to hardware structure + * + * Initialize the hardware by resetting and then starting the hardware + **/ +s32 ixgbe_init_hw(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_hw, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_reset_hw - Performs a hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks and + * clears all interrupts, performs a PHY reset, and performs a MAC reset + **/ +s32 ixgbe_reset_hw(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.reset_hw, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_start_hw - Prepares hardware for Rx/Tx + * @hw: pointer to hardware structure + * + * Starts the hardware by filling the bus info structure and media type, + * clears all on chip counters, initializes receive address registers, + * multicast table, VLAN filter table, calls routine to setup link and + * flow control settings, and leaves transmit and receive units disabled + * and uninitialized. + **/ +s32 ixgbe_start_hw(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.start_hw, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_hw_cntrs - Clear hardware counters + * @hw: pointer to hardware structure + * + * Clears all hardware statistics counters by reading them from the hardware + * Statistics counters are clear on read. + **/ +s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_hw_cntrs, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_media_type - Get media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_media_type, (hw), + ixgbe_media_type_unknown); +} + +/** + * ixgbe_get_mac_addr - Get MAC address + * @hw: pointer to hardware structure + * @mac_addr: Adapter MAC address + * + * Reads the adapter's MAC address from the first Receive Address Register + * (RAR0) A reset of the adapter must have been performed prior to calling + * this function in order for the MAC address to have been loaded from the + * EEPROM into RAR0 + **/ +s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_mac_addr, + (hw, mac_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_san_mac_addr - Get SAN MAC address + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Reads the SAN MAC address from the EEPROM, if it's available. This is + * per-port, so set_lan_id() must be called before reading the addresses. + **/ +s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_san_mac_addr, + (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_san_mac_addr - Write a SAN MAC address + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Writes A SAN MAC address to the EEPROM. + **/ +s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_san_mac_addr, + (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_device_caps - Get additional device capabilities + * @hw: pointer to hardware structure + * @device_caps: the EEPROM word for device capabilities + * + * Reads the extra device capabilities from the EEPROM + **/ +s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_device_caps, + (hw, device_caps), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_wwn_prefix - Get alternative WWNN/WWPN prefix from the EEPROM + * @hw: pointer to hardware structure + * @wwnn_prefix: the alternative WWNN prefix + * @wwpn_prefix: the alternative WWPN prefix + * + * This function will read the EEPROM from the alternative SAN MAC address + * block to check the support for the alternative WWNN/WWPN prefix support. + **/ +s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_wwn_prefix, + (hw, wwnn_prefix, wwpn_prefix), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_fcoe_boot_status - Get FCOE boot status from EEPROM + * @hw: pointer to hardware structure + * @bs: the fcoe boot status + * + * This function will read the FCOE boot status from the iSCSI FCOE block + **/ +s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_fcoe_boot_status, + (hw, bs), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_bus_info - Set PCI bus info + * @hw: pointer to hardware structure + * + * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure + **/ +s32 ixgbe_get_bus_info(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_bus_info, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_num_of_tx_queues - Get Tx queues + * @hw: pointer to hardware structure + * + * Returns the number of transmit queues for the given adapter. + **/ +u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw) +{ + return hw->mac.max_tx_queues; +} + +/** + * ixgbe_get_num_of_rx_queues - Get Rx queues + * @hw: pointer to hardware structure + * + * Returns the number of receive queues for the given adapter. + **/ +u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw) +{ + return hw->mac.max_rx_queues; +} + +/** + * ixgbe_stop_adapter - Disable Rx/Tx units + * @hw: pointer to hardware structure + * + * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts, + * disables transmit and receive units. The adapter_stopped flag is used by + * the shared code and drivers to determine if the adapter is in a stopped + * state and should not touch the hardware. + **/ +s32 ixgbe_stop_adapter(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.stop_adapter, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_pba_string - Reads part number string from EEPROM + * @hw: pointer to hardware structure + * @pba_num: stores the part number string from the EEPROM + * @pba_num_size: part number string buffer length + * + * Reads the part number string from the EEPROM. + **/ +s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size) +{ + return ixgbe_read_pba_string_generic(hw, pba_num, pba_num_size); +} + +/** + * ixgbe_identify_phy - Get PHY type + * @hw: pointer to hardware structure + * + * Determines the physical layer module found on the current adapter. + **/ +s32 ixgbe_identify_phy(struct ixgbe_hw *hw) +{ + s32 status = 0; + + if (hw->phy.type == ixgbe_phy_unknown) { + status = ixgbe_call_func(hw, hw->phy.ops.identify, (hw), + IXGBE_NOT_IMPLEMENTED); + } + + return status; +} + +/** + * ixgbe_reset_phy - Perform a PHY reset + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reset_phy(struct ixgbe_hw *hw) +{ + s32 status = 0; + + if (hw->phy.type == ixgbe_phy_unknown) { + if (ixgbe_identify_phy(hw) != 0) + status = IXGBE_ERR_PHY; + } + + if (status == 0) { + status = ixgbe_call_func(hw, hw->phy.ops.reset, (hw), + IXGBE_NOT_IMPLEMENTED); + } + return status; +} + +/** + * ixgbe_get_phy_firmware_version - + * @hw: pointer to hardware structure + * @firmware_version: pointer to firmware version + **/ +s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, u16 *firmware_version) +{ + s32 status = 0; + + status = ixgbe_call_func(hw, hw->phy.ops.get_firmware_version, + (hw, firmware_version), + IXGBE_NOT_IMPLEMENTED); + return status; +} + +/** + * ixgbe_read_phy_reg - Read PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit address of PHY register to read + * @phy_data: Pointer to read data from PHY register + * + * Reads a value from a specified PHY register + **/ +s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 *phy_data) +{ + if (hw->phy.id == 0) + ixgbe_identify_phy(hw); + + return ixgbe_call_func(hw, hw->phy.ops.read_reg, (hw, reg_addr, + device_type, phy_data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_phy_reg - Write PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit PHY register to write + * @phy_data: Data to write to the PHY register + * + * Writes a value to specified PHY register + **/ +s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 phy_data) +{ + if (hw->phy.id == 0) + ixgbe_identify_phy(hw); + + return ixgbe_call_func(hw, hw->phy.ops.write_reg, (hw, reg_addr, + device_type, phy_data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_setup_phy_link - Restart PHY autoneg + * @hw: pointer to hardware structure + * + * Restart autonegotiation and PHY and waits for completion. + **/ +s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->phy.ops.setup_link, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_check_phy_link - Determine link and speed status + * @hw: pointer to hardware structure + * + * Reads a PHY register to determine if link is up and the current speed for + * the PHY. + **/ +s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + return ixgbe_call_func(hw, hw->phy.ops.check_link, (hw, speed, + link_up), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_setup_phy_link_speed - Set auto advertise + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * + * Sets the auto advertised capabilities + **/ +s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + return ixgbe_call_func(hw, hw->phy.ops.setup_link_speed, (hw, speed, + autoneg, autoneg_wait_to_complete), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_check_link - Get link and speed status + * @hw: pointer to hardware structure + * + * Reads the links register to determine if link is up and the current speed + **/ +s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete) +{ + return ixgbe_call_func(hw, hw->mac.ops.check_link, (hw, speed, + link_up, link_up_wait_to_complete), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_disable_tx_laser - Disable Tx laser + * @hw: pointer to hardware structure + * + * If the driver needs to disable the laser on SFI optics. + **/ +void ixgbe_disable_tx_laser(struct ixgbe_hw *hw) +{ + if (hw->mac.ops.disable_tx_laser) + hw->mac.ops.disable_tx_laser(hw); +} + +/** + * ixgbe_enable_tx_laser - Enable Tx laser + * @hw: pointer to hardware structure + * + * If the driver needs to enable the laser on SFI optics. + **/ +void ixgbe_enable_tx_laser(struct ixgbe_hw *hw) +{ + if (hw->mac.ops.enable_tx_laser) + hw->mac.ops.enable_tx_laser(hw); +} + +/** + * ixgbe_flap_tx_laser - flap Tx laser to start autotry process + * @hw: pointer to hardware structure + * + * When the driver changes the link speeds that it can support then + * flap the tx laser to alert the link partner to start autotry + * process on its end. + **/ +void ixgbe_flap_tx_laser(struct ixgbe_hw *hw) +{ + if (hw->mac.ops.flap_tx_laser) + hw->mac.ops.flap_tx_laser(hw); +} + +/** + * ixgbe_setup_link - Set link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * + * Configures link settings. Restarts the link. + * Performs autonegotiation if needed. + **/ +s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + return ixgbe_call_func(hw, hw->mac.ops.setup_link, (hw, speed, + autoneg, autoneg_wait_to_complete), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_link_capabilities - Returns link capabilities + * @hw: pointer to hardware structure + * + * Determines the link capabilities of the current configuration. + **/ +s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *autoneg) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_link_capabilities, (hw, + speed, autoneg), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_led_on - Turn on LEDs + * @hw: pointer to hardware structure + * @index: led number to turn on + * + * Turns on the software controllable LEDs. + **/ +s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.led_on, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_led_off - Turn off LEDs + * @hw: pointer to hardware structure + * @index: led number to turn off + * + * Turns off the software controllable LEDs. + **/ +s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.led_off, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_blink_led_start - Blink LEDs + * @hw: pointer to hardware structure + * @index: led number to blink + * + * Blink LED based on index. + **/ +s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.blink_led_start, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_blink_led_stop - Stop blinking LEDs + * @hw: pointer to hardware structure + * + * Stop blinking LED based on index. + **/ +s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.blink_led_stop, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_eeprom_params - Initialize EEPROM parameters + * @hw: pointer to hardware structure + * + * Initializes the EEPROM parameters ixgbe_eeprom_info within the + * ixgbe_hw struct in order to set up EEPROM access. + **/ +s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.init_params, (hw), + IXGBE_NOT_IMPLEMENTED); +} + + +/** + * ixgbe_write_eeprom - Write word to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @data: 16 bit word to be written to the EEPROM + * + * Writes 16 bit value to EEPROM. If ixgbe_eeprom_update_checksum is not + * called after this function, the EEPROM will most likely contain an + * invalid checksum. + **/ +s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.write, (hw, offset, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_eeprom_buffer - Write word(s) to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @data: 16 bit word(s) to be written to the EEPROM + * @words: number of words + * + * Writes 16 bit word(s) to EEPROM. If ixgbe_eeprom_update_checksum is not + * called after this function, the EEPROM will most likely contain an + * invalid checksum. + **/ +s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, u16 words, + u16 *data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.write_buffer, + (hw, offset, words, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_eeprom - Read word from EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit value from EEPROM + * + * Reads 16 bit value from EEPROM + **/ +s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.read, (hw, offset, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_eeprom_buffer - Read word(s) from EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit word(s) from EEPROM + * @words: number of words + * + * Reads 16 bit word(s) from EEPROM + **/ +s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.read_buffer, + (hw, offset, words, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_validate_eeprom_checksum - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum + **/ +s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.validate_checksum, + (hw, checksum_val), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_eeprom_update_checksum - Updates the EEPROM checksum + * @hw: pointer to hardware structure + **/ +s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.update_checksum, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_insert_mac_addr - Find a RAR for this mac address + * @hw: pointer to hardware structure + * @addr: Address to put into receive address register + * @vmdq: VMDq pool to assign + * + * Puts an ethernet address into a receive address register, or + * finds the rar that it is aleady in; adds to the pool list + **/ +s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.insert_mac_addr, + (hw, addr, vmdq), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_rar - Set Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * @addr: Address to put into receive address register + * @vmdq: VMDq "set" + * @enable_addr: set flag that address is active + * + * Puts an ethernet address into a receive address register. + **/ +s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_rar, (hw, index, addr, vmdq, + enable_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_rar - Clear Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * + * Puts an ethernet address into a receive address register. + **/ +s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_rar, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_vmdq - Associate a VMDq index with a receive address + * @hw: pointer to hardware structure + * @rar: receive address register index to associate with VMDq index + * @vmdq: VMDq set or pool index + **/ +s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vmdq, (hw, rar, vmdq), + IXGBE_NOT_IMPLEMENTED); + +} + +/** + * ixgbe_set_vmdq_san_mac - Associate VMDq index 127 with a receive address + * @hw: pointer to hardware structure + * @vmdq: VMDq default pool index + **/ +s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vmdq_san_mac, + (hw, vmdq), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_vmdq - Disassociate a VMDq index from a receive address + * @hw: pointer to hardware structure + * @rar: receive address register index to disassociate with VMDq index + * @vmdq: VMDq set or pool index + **/ +s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_vmdq, (hw, rar, vmdq), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_rx_addrs - Initializes receive address filters. + * @hw: pointer to hardware structure + * + * Places the MAC address in receive address register 0 and clears the rest + * of the receive address registers. Clears the multicast table. Assumes + * the receiver is in reset when the routine is called. + **/ +s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_rx_addrs, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_num_rx_addrs - Returns the number of RAR entries. + * @hw: pointer to hardware structure + **/ +u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw) +{ + return hw->mac.num_rar_entries; +} + +/** + * ixgbe_update_uc_addr_list - Updates the MAC's list of secondary addresses + * @hw: pointer to hardware structure + * @addr_list: the list of new multicast addresses + * @addr_count: number of addresses + * @func: iterator function to walk the multicast address list + * + * The given list replaces any existing list. Clears the secondary addrs from + * receive address registers. Uses unused receive address registers for the + * first secondary addresses, and falls back to promiscuous mode as needed. + **/ +s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr func) +{ + return ixgbe_call_func(hw, hw->mac.ops.update_uc_addr_list, (hw, + addr_list, addr_count, func), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_update_mc_addr_list - Updates the MAC's list of multicast addresses + * @hw: pointer to hardware structure + * @mc_addr_list: the list of new multicast addresses + * @mc_addr_count: number of addresses + * @func: iterator function to walk the multicast address list + * + * The given list replaces any existing list. Clears the MC addrs from receive + * address registers and the multicast table. Uses unused receive address + * registers for the first multicast addresses, and hashes the rest into the + * multicast table. + **/ +s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr func, + bool clear) +{ + return ixgbe_call_func(hw, hw->mac.ops.update_mc_addr_list, (hw, + mc_addr_list, mc_addr_count, func, clear), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_enable_mc - Enable multicast address in RAR + * @hw: pointer to hardware structure + * + * Enables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_enable_mc(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.enable_mc, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_disable_mc - Disable multicast address in RAR + * @hw: pointer to hardware structure + * + * Disables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_disable_mc(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.disable_mc, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_vfta - Clear VLAN filter table + * @hw: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +s32 ixgbe_clear_vfta(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_vfta, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_vfta - Set VLAN filter table + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFTA + * @vlan_on: boolean flag to turn on/off VLAN in VFTA + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind, + vlan_on), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_vlvf - Set VLAN Pool Filter + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * @vfta_changed: pointer to boolean flag which indicates whether VFTA + * should be changed + * + * Turn on/off specified bit in VLVF table. + **/ +s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, + bool *vfta_changed) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind, + vlan_on, vfta_changed), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_fc_enable - Enable flow control + * @hw: pointer to hardware structure + * + * Configures the flow control settings based on SW configuration. + **/ +s32 ixgbe_fc_enable(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.fc_enable, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_fw_drv_ver - Try to send the driver version number FW + * @hw: pointer to hardware structure + * @maj: driver major number to be sent to firmware + * @min: driver minor number to be sent to firmware + * @build: driver build number to be sent to firmware + * @ver: driver version number to be sent to firmware + **/ +s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, + u8 ver) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min, + build, ver), IXGBE_NOT_IMPLEMENTED); +} + + +/** + * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data + * @hw: pointer to hardware structure + * + * Updates the temperatures in mac.thermal_sensor_data + **/ +s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_thermal_sensor_data, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Inits the thermal sensor thresholds according to the NVM map + **/ +s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_thermal_sensor_thresh, (hw), + IXGBE_NOT_IMPLEMENTED); +} +/** + * ixgbe_read_analog_reg8 - Reads 8 bit analog register + * @hw: pointer to hardware structure + * @reg: analog register to read + * @val: read value + * + * Performs write operation to analog register specified. + **/ +s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val) +{ + return ixgbe_call_func(hw, hw->mac.ops.read_analog_reg8, (hw, reg, + val), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_analog_reg8 - Writes 8 bit analog register + * @hw: pointer to hardware structure + * @reg: analog register to write + * @val: value to write + * + * Performs write operation to Atlas analog register specified. + **/ +s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val) +{ + return ixgbe_call_func(hw, hw->mac.ops.write_analog_reg8, (hw, reg, + val), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_uta_tables - Initializes Unicast Table Arrays. + * @hw: pointer to hardware structure + * + * Initializes the Unicast Table Arrays to zero on device load. This + * is part of the Rx init addr execution path. + **/ +s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_uta_tables, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_i2c_byte - Reads 8 bit word over I2C at specified device address + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 *data) +{ + return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte, (hw, byte_offset, + dev_addr, data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_i2c_byte - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface + * at a specified device address. + **/ +s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 data) +{ + return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte, (hw, byte_offset, + dev_addr, data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to write + * @eeprom_data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, + u8 byte_offset, u8 eeprom_data) +{ + return ixgbe_call_func(hw, hw->phy.ops.write_i2c_eeprom, + (hw, byte_offset, eeprom_data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_i2c_eeprom - Reads 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to read + * @eeprom_data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data) +{ + return ixgbe_call_func(hw, hw->phy.ops.read_i2c_eeprom, + (hw, byte_offset, eeprom_data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_supported_physical_layer - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_supported_physical_layer, + (hw), IXGBE_PHYSICAL_LAYER_UNKNOWN); +} + +/** + * ixgbe_enable_rx_dma - Enables Rx DMA unit, dependent on device specifics + * @hw: pointer to hardware structure + * @regval: bitfield to write to the Rx DMA register + * + * Enables the Rx DMA unit of the device. + **/ +s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval) +{ + return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma, + (hw, regval), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_disable_sec_rx_path - Stops the receive data path + * @hw: pointer to hardware structure + * + * Stops the receive data path. + **/ +s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.disable_sec_rx_path, + (hw), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_enable_sec_rx_path - Enables the receive data path + * @hw: pointer to hardware structure + * + * Enables the receive data path. + **/ +s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.enable_sec_rx_path, + (hw), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to acquire + * + * Acquires the SWFW semaphore through SW_FW_SYNC register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) +{ + return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync, + (hw, mask), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_release_swfw_semaphore - Release SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to release + * + * Releases the SWFW semaphore through SW_FW_SYNC register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) +{ + if (hw->mac.ops.release_swfw_sync) + hw->mac.ops.release_swfw_sync(hw, mask); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h new file mode 100644 index 00000000..a6ab30d2 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h @@ -0,0 +1,168 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_API_H_ +#define _IXGBE_API_H_ + +#include "ixgbe_type.h" + +s32 ixgbe_init_shared_code(struct ixgbe_hw *hw); + +extern s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw); + +s32 ixgbe_set_mac_type(struct ixgbe_hw *hw); +s32 ixgbe_init_hw(struct ixgbe_hw *hw); +s32 ixgbe_reset_hw(struct ixgbe_hw *hw); +s32 ixgbe_start_hw(struct ixgbe_hw *hw); +s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw); +enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw); +s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr); +s32 ixgbe_get_bus_info(struct ixgbe_hw *hw); +u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw); +u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw); +s32 ixgbe_stop_adapter(struct ixgbe_hw *hw); +s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size); + +s32 ixgbe_identify_phy(struct ixgbe_hw *hw); +s32 ixgbe_reset_phy(struct ixgbe_hw *hw); +s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 *phy_data); +s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 phy_data); + +s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw); +s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up); +s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +void ixgbe_disable_tx_laser(struct ixgbe_hw *hw); +void ixgbe_enable_tx_laser(struct ixgbe_hw *hw); +void ixgbe_flap_tx_laser(struct ixgbe_hw *hw); +s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, bool autoneg_wait_to_complete); +s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete); +s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *autoneg); +s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index); + +s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw); +s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); + +s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val); +s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw); + +s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); +s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr); +s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq); +s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw); +u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw); +s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr func); +s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr func, + bool clear); +void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr_list, u32 vmdq); +s32 ixgbe_enable_mc(struct ixgbe_hw *hw); +s32 ixgbe_disable_mc(struct ixgbe_hw *hw); +s32 ixgbe_clear_vfta(struct ixgbe_hw *hw); +s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, + u32 vind, bool vlan_on); +s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool *vfta_changed); +s32 ixgbe_fc_enable(struct ixgbe_hw *hw); +s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, + u8 ver); +s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw); +s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw); +void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr); +s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, + u16 *firmware_version); +s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val); +s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val); +s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw); +s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data); +u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw); +s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval); +s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw); +s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw); +s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw); +s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl); +s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl); +s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common, + u8 queue); +s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input_mask); +s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id, u8 queue); +s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id); +s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + union ixgbe_atr_input *mask, + u16 soft_id, + u8 queue); +void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, + union ixgbe_atr_input *mask); +u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common); +s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 *data); +s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 data); +s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data); +s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); +s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); +s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps); +s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); +void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); +s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix); +s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs); + +#endif /* _IXGBE_API_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c new file mode 100644 index 00000000..93659ca0 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c @@ -0,0 +1,4082 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_common.h" +#include "ixgbe_phy.h" +#include "ixgbe_api.h" + +static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw); +static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw); +static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw); +static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw); +static void ixgbe_standby_eeprom(struct ixgbe_hw *hw); +static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, + u16 count); +static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count); +static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec); +static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec); +static void ixgbe_release_eeprom(struct ixgbe_hw *hw); + +static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr); +static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw, + u16 *san_mac_offset); +static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, + u16 offset); + +/** + * ixgbe_init_ops_generic - Inits function ptrs + * @hw: pointer to the hardware structure + * + * Initialize the function pointers. + **/ +s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + struct ixgbe_mac_info *mac = &hw->mac; + u32 eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* EEPROM */ + eeprom->ops.init_params = &ixgbe_init_eeprom_params_generic; + /* If EEPROM is valid (bit 8 = 1), use EERD otherwise use bit bang */ + if (eec & IXGBE_EEC_PRES) { + eeprom->ops.read = &ixgbe_read_eerd_generic; + eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_generic; + } else { + eeprom->ops.read = &ixgbe_read_eeprom_bit_bang_generic; + eeprom->ops.read_buffer = + &ixgbe_read_eeprom_buffer_bit_bang_generic; + } + eeprom->ops.write = &ixgbe_write_eeprom_generic; + eeprom->ops.write_buffer = &ixgbe_write_eeprom_buffer_bit_bang_generic; + eeprom->ops.validate_checksum = + &ixgbe_validate_eeprom_checksum_generic; + eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_generic; + eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_generic; + + /* MAC */ + mac->ops.init_hw = &ixgbe_init_hw_generic; + mac->ops.reset_hw = NULL; + mac->ops.start_hw = &ixgbe_start_hw_generic; + mac->ops.clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic; + mac->ops.get_media_type = NULL; + mac->ops.get_supported_physical_layer = NULL; + mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_generic; + mac->ops.get_mac_addr = &ixgbe_get_mac_addr_generic; + mac->ops.stop_adapter = &ixgbe_stop_adapter_generic; + mac->ops.get_bus_info = &ixgbe_get_bus_info_generic; + mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie; + mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync; + mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync; + + /* LEDs */ + mac->ops.led_on = &ixgbe_led_on_generic; + mac->ops.led_off = &ixgbe_led_off_generic; + mac->ops.blink_led_start = &ixgbe_blink_led_start_generic; + mac->ops.blink_led_stop = &ixgbe_blink_led_stop_generic; + + /* RAR, Multicast, VLAN */ + mac->ops.set_rar = &ixgbe_set_rar_generic; + mac->ops.clear_rar = &ixgbe_clear_rar_generic; + mac->ops.insert_mac_addr = NULL; + mac->ops.set_vmdq = NULL; + mac->ops.clear_vmdq = NULL; + mac->ops.init_rx_addrs = &ixgbe_init_rx_addrs_generic; + mac->ops.update_uc_addr_list = &ixgbe_update_uc_addr_list_generic; + mac->ops.update_mc_addr_list = &ixgbe_update_mc_addr_list_generic; + mac->ops.enable_mc = &ixgbe_enable_mc_generic; + mac->ops.disable_mc = &ixgbe_disable_mc_generic; + mac->ops.clear_vfta = NULL; + mac->ops.set_vfta = NULL; + mac->ops.set_vlvf = NULL; + mac->ops.init_uta_tables = NULL; + + /* Flow Control */ + mac->ops.fc_enable = &ixgbe_fc_enable_generic; + + /* Link */ + mac->ops.get_link_capabilities = NULL; + mac->ops.setup_link = NULL; + mac->ops.check_link = NULL; + + return 0; +} + +/** + * ixgbe_device_supports_autoneg_fc - Check if phy supports autoneg flow + * control + * @hw: pointer to hardware structure + * + * There are several phys that do not support autoneg flow control. This + * function check the device id to see if the associated phy supports + * autoneg flow control. + **/ +static s32 ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) +{ + + switch (hw->device_id) { + case IXGBE_DEV_ID_X540T: + return 0; + case IXGBE_DEV_ID_82599_T3_LOM: + return 0; + default: + return IXGBE_ERR_FC_NOT_SUPPORTED; + } +} + +/** + * ixgbe_setup_fc - Set up flow control + * @hw: pointer to hardware structure + * + * Called at init time to set up flow control. + **/ +static s32 ixgbe_setup_fc(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 reg = 0, reg_bp = 0; + u16 reg_cu = 0; + + /* + * Validate the requested mode. Strict IEEE mode does not allow + * ixgbe_fc_rx_pause because it will cause us to fail at UNH. + */ + if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { + hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + + /* + * 10gig parts do not have a word in the EEPROM to determine the + * default flow control setting, so we explicitly set it to full. + */ + if (hw->fc.requested_mode == ixgbe_fc_default) + hw->fc.requested_mode = ixgbe_fc_full; + + /* + * Set up the 1G and 10G flow control advertisement registers so the + * HW will be able to do fc autoneg once the cable is plugged in. If + * we link at 10G, the 1G advertisement is harmless and vice versa. + */ + switch (hw->phy.media_type) { + case ixgbe_media_type_fiber: + case ixgbe_media_type_backplane: + reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); + reg_bp = IXGBE_READ_REG(hw, IXGBE_AUTOC); + break; + case ixgbe_media_type_copper: + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®_cu); + break; + default: + break; + } + + /* + * The possible values of fc.requested_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.requested_mode) { + case ixgbe_fc_none: + /* Flow control completely disabled by software override. */ + reg &= ~(IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE); + if (hw->phy.media_type == ixgbe_media_type_backplane) + reg_bp &= ~(IXGBE_AUTOC_SYM_PAUSE | + IXGBE_AUTOC_ASM_PAUSE); + else if (hw->phy.media_type == ixgbe_media_type_copper) + reg_cu &= ~(IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE); + break; + case ixgbe_fc_tx_pause: + /* + * Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + reg |= IXGBE_PCS1GANA_ASM_PAUSE; + reg &= ~IXGBE_PCS1GANA_SYM_PAUSE; + if (hw->phy.media_type == ixgbe_media_type_backplane) { + reg_bp |= IXGBE_AUTOC_ASM_PAUSE; + reg_bp &= ~IXGBE_AUTOC_SYM_PAUSE; + } else if (hw->phy.media_type == ixgbe_media_type_copper) { + reg_cu |= IXGBE_TAF_ASM_PAUSE; + reg_cu &= ~IXGBE_TAF_SYM_PAUSE; + } + break; + case ixgbe_fc_rx_pause: + /* + * Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE, as such we fall + * through to the fc_full statement. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + reg |= IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE; + if (hw->phy.media_type == ixgbe_media_type_backplane) + reg_bp |= IXGBE_AUTOC_SYM_PAUSE | + IXGBE_AUTOC_ASM_PAUSE; + else if (hw->phy.media_type == ixgbe_media_type_copper) + reg_cu |= IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE; + break; + default: + hw_dbg(hw, "Flow control param set incorrectly\n"); + ret_val = IXGBE_ERR_CONFIG; + goto out; + break; + } + + if (hw->mac.type != ixgbe_mac_X540) { + /* + * Enable auto-negotiation between the MAC & PHY; + * the MAC will advertise clause 37 flow control. + */ + IXGBE_WRITE_REG(hw, IXGBE_PCS1GANA, reg); + reg = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL); + + /* Disable AN timeout */ + if (hw->fc.strict_ieee) + reg &= ~IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN; + + IXGBE_WRITE_REG(hw, IXGBE_PCS1GLCTL, reg); + hw_dbg(hw, "Set up FC; PCS1GLCTL = 0x%08X\n", reg); + } + + /* + * AUTOC restart handles negotiation of 1G and 10G on backplane + * and copper. There is no need to set the PCS1GCTL register. + * + */ + if (hw->phy.media_type == ixgbe_media_type_backplane) { + reg_bp |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_bp); + } else if ((hw->phy.media_type == ixgbe_media_type_copper) && + (ixgbe_device_supports_autoneg_fc(hw) == 0)) { + hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg_cu); + } + + hw_dbg(hw, "Set up FC; IXGBE_AUTOC = 0x%08X\n", reg); +out: + return ret_val; +} + +/** + * ixgbe_start_hw_generic - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware by filling the bus info structure and media type, clears + * all on chip counters, initializes receive address registers, multicast + * table, VLAN filter table, calls routine to set up link and flow control + * settings, and leaves transmit and receive units disabled and uninitialized + **/ +s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) +{ + s32 ret_val; + u32 ctrl_ext; + + /* Set the media type */ + hw->phy.media_type = hw->mac.ops.get_media_type(hw); + + /* PHY ops initialization must be done in reset_hw() */ + + /* Clear the VLAN filter table */ + hw->mac.ops.clear_vfta(hw); + + /* Clear statistics registers */ + hw->mac.ops.clear_hw_cntrs(hw); + + /* Set No Snoop Disable */ + ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + ctrl_ext |= IXGBE_CTRL_EXT_NS_DIS; + IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + IXGBE_WRITE_FLUSH(hw); + + /* Setup flow control */ + ret_val = ixgbe_setup_fc(hw); + if (ret_val != 0) + goto out; + + /* Clear adapter stopped flag */ + hw->adapter_stopped = false; + +out: + return ret_val; +} + +/** + * ixgbe_start_hw_gen2 - Init sequence for common device family + * @hw: pointer to hw structure + * + * Performs the init sequence common to the second generation + * of 10 GbE devices. + * Devices in the second generation: + * 82599 + * X540 + **/ +s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) +{ + u32 i; + u32 regval; + + /* Clear the rate limiters */ + for (i = 0; i < hw->mac.max_tx_queues; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i); + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0); + } + IXGBE_WRITE_FLUSH(hw); + + /* Disable relaxed ordering */ + for (i = 0; i < hw->mac.max_tx_queues; i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); + regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval); + } + + for (i = 0; i < hw->mac.max_rx_queues; i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | + IXGBE_DCA_RXCTRL_HEAD_WRO_EN); + IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); + } + + return 0; +} + +/** + * ixgbe_init_hw_generic - Generic hardware initialization + * @hw: pointer to hardware structure + * + * Initialize the hardware by resetting the hardware, filling the bus info + * structure and media type, clears all on chip counters, initializes receive + * address registers, multicast table, VLAN filter table, calls routine to set + * up link and flow control settings, and leaves transmit and receive units + * disabled and uninitialized + **/ +s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw) +{ + s32 status; + + /* Reset the hardware */ + status = hw->mac.ops.reset_hw(hw); + + if (status == 0) { + /* Start the HW */ + status = hw->mac.ops.start_hw(hw); + } + + return status; +} + +/** + * ixgbe_clear_hw_cntrs_generic - Generic clear hardware counters + * @hw: pointer to hardware structure + * + * Clears all hardware statistics counters by reading them from the hardware + * Statistics counters are clear on read. + **/ +s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw) +{ + u16 i = 0; + + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + IXGBE_READ_REG(hw, IXGBE_ERRBC); + IXGBE_READ_REG(hw, IXGBE_MSPDC); + for (i = 0; i < 8; i++) + IXGBE_READ_REG(hw, IXGBE_MPC(i)); + + IXGBE_READ_REG(hw, IXGBE_MLFC); + IXGBE_READ_REG(hw, IXGBE_MRFC); + IXGBE_READ_REG(hw, IXGBE_RLEC); + IXGBE_READ_REG(hw, IXGBE_LXONTXC); + IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); + if (hw->mac.type >= ixgbe_mac_82599EB) { + IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); + IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } else { + IXGBE_READ_REG(hw, IXGBE_LXONRXC); + IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + } + + for (i = 0; i < 8; i++) { + IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); + IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); + if (hw->mac.type >= ixgbe_mac_82599EB) { + IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); + IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); + } else { + IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); + IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); + } + } + if (hw->mac.type >= ixgbe_mac_82599EB) + for (i = 0; i < 8; i++) + IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i)); + IXGBE_READ_REG(hw, IXGBE_PRC64); + IXGBE_READ_REG(hw, IXGBE_PRC127); + IXGBE_READ_REG(hw, IXGBE_PRC255); + IXGBE_READ_REG(hw, IXGBE_PRC511); + IXGBE_READ_REG(hw, IXGBE_PRC1023); + IXGBE_READ_REG(hw, IXGBE_PRC1522); + IXGBE_READ_REG(hw, IXGBE_GPRC); + IXGBE_READ_REG(hw, IXGBE_BPRC); + IXGBE_READ_REG(hw, IXGBE_MPRC); + IXGBE_READ_REG(hw, IXGBE_GPTC); + IXGBE_READ_REG(hw, IXGBE_GORCL); + IXGBE_READ_REG(hw, IXGBE_GORCH); + IXGBE_READ_REG(hw, IXGBE_GOTCL); + IXGBE_READ_REG(hw, IXGBE_GOTCH); + if (hw->mac.type == ixgbe_mac_82598EB) + for (i = 0; i < 8; i++) + IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + IXGBE_READ_REG(hw, IXGBE_RUC); + IXGBE_READ_REG(hw, IXGBE_RFC); + IXGBE_READ_REG(hw, IXGBE_ROC); + IXGBE_READ_REG(hw, IXGBE_RJC); + IXGBE_READ_REG(hw, IXGBE_MNGPRC); + IXGBE_READ_REG(hw, IXGBE_MNGPDC); + IXGBE_READ_REG(hw, IXGBE_MNGPTC); + IXGBE_READ_REG(hw, IXGBE_TORL); + IXGBE_READ_REG(hw, IXGBE_TORH); + IXGBE_READ_REG(hw, IXGBE_TPR); + IXGBE_READ_REG(hw, IXGBE_TPT); + IXGBE_READ_REG(hw, IXGBE_PTC64); + IXGBE_READ_REG(hw, IXGBE_PTC127); + IXGBE_READ_REG(hw, IXGBE_PTC255); + IXGBE_READ_REG(hw, IXGBE_PTC511); + IXGBE_READ_REG(hw, IXGBE_PTC1023); + IXGBE_READ_REG(hw, IXGBE_PTC1522); + IXGBE_READ_REG(hw, IXGBE_MPTC); + IXGBE_READ_REG(hw, IXGBE_BPTC); + for (i = 0; i < 16; i++) { + IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + if (hw->mac.type >= ixgbe_mac_82599EB) { + IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); + IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); + } else { + IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + } + } + + if (hw->mac.type == ixgbe_mac_X540) { + if (hw->phy.id == 0) + ixgbe_identify_phy(hw); + hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + hw->phy.ops.read_reg(hw, IXGBE_LDPCECL, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + hw->phy.ops.read_reg(hw, IXGBE_LDPCECH, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + } + + return 0; +} + +/** + * ixgbe_read_pba_string_generic - Reads part number string from EEPROM + * @hw: pointer to hardware structure + * @pba_num: stores the part number string from the EEPROM + * @pba_num_size: part number string buffer length + * + * Reads the part number string from the EEPROM. + **/ +s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, + u32 pba_num_size) +{ + s32 ret_val; + u16 data; + u16 pba_ptr; + u16 offset; + u16 length; + + if (pba_num == NULL) { + hw_dbg(hw, "PBA string buffer was null\n"); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + + ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM1_PTR, &pba_ptr); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + + /* + * if data is not ptr guard the PBA must be in legacy format which + * means pba_ptr is actually our second data word for the PBA number + * and we can decode it into an ascii string + */ + if (data != IXGBE_PBANUM_PTR_GUARD) { + hw_dbg(hw, "NVM PBA number is not stored as string\n"); + + /* we will need 11 characters to store the PBA */ + if (pba_num_size < 11) { + hw_dbg(hw, "PBA string buffer too small\n"); + return IXGBE_ERR_NO_SPACE; + } + + /* extract hex string from data and pba_ptr */ + pba_num[0] = (data >> 12) & 0xF; + pba_num[1] = (data >> 8) & 0xF; + pba_num[2] = (data >> 4) & 0xF; + pba_num[3] = data & 0xF; + pba_num[4] = (pba_ptr >> 12) & 0xF; + pba_num[5] = (pba_ptr >> 8) & 0xF; + pba_num[6] = '-'; + pba_num[7] = 0; + pba_num[8] = (pba_ptr >> 4) & 0xF; + pba_num[9] = pba_ptr & 0xF; + + /* put a null character on the end of our string */ + pba_num[10] = '\0'; + + /* switch all the data but the '-' to hex char */ + for (offset = 0; offset < 10; offset++) { + if (pba_num[offset] < 0xA) + pba_num[offset] += '0'; + else if (pba_num[offset] < 0x10) + pba_num[offset] += 'A' - 0xA; + } + + return 0; + } + + ret_val = hw->eeprom.ops.read(hw, pba_ptr, &length); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + + if (length == 0xFFFF || length == 0) { + hw_dbg(hw, "NVM PBA number section invalid length\n"); + return IXGBE_ERR_PBA_SECTION; + } + + /* check if pba_num buffer is big enough */ + if (pba_num_size < (((u32)length * 2) - 1)) { + hw_dbg(hw, "PBA string buffer too small\n"); + return IXGBE_ERR_NO_SPACE; + } + + /* trim pba length from start of string */ + pba_ptr++; + length--; + + for (offset = 0; offset < length; offset++) { + ret_val = hw->eeprom.ops.read(hw, pba_ptr + offset, &data); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + pba_num[offset * 2] = (u8)(data >> 8); + pba_num[(offset * 2) + 1] = (u8)(data & 0xFF); + } + pba_num[offset * 2] = '\0'; + + return 0; +} + +/** + * ixgbe_get_mac_addr_generic - Generic get MAC address + * @hw: pointer to hardware structure + * @mac_addr: Adapter MAC address + * + * Reads the adapter's MAC address from first Receive Address Register (RAR0) + * A reset of the adapter must be performed prior to calling this function + * in order for the MAC address to have been loaded from the EEPROM into RAR0 + **/ +s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr) +{ + u32 rar_high; + u32 rar_low; + u16 i; + + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0)); + rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0)); + + for (i = 0; i < 4; i++) + mac_addr[i] = (u8)(rar_low >> (i*8)); + + for (i = 0; i < 2; i++) + mac_addr[i+4] = (u8)(rar_high >> (i*8)); + + return 0; +} + +/** + * ixgbe_get_bus_info_generic - Generic set PCI bus info + * @hw: pointer to hardware structure + * + * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure + **/ +s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u16 link_status; + + hw->bus.type = ixgbe_bus_type_pci_express; + + /* Get the negotiated link width and speed from PCI config space */ + link_status = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS); + + switch (link_status & IXGBE_PCI_LINK_WIDTH) { + case IXGBE_PCI_LINK_WIDTH_1: + hw->bus.width = ixgbe_bus_width_pcie_x1; + break; + case IXGBE_PCI_LINK_WIDTH_2: + hw->bus.width = ixgbe_bus_width_pcie_x2; + break; + case IXGBE_PCI_LINK_WIDTH_4: + hw->bus.width = ixgbe_bus_width_pcie_x4; + break; + case IXGBE_PCI_LINK_WIDTH_8: + hw->bus.width = ixgbe_bus_width_pcie_x8; + break; + default: + hw->bus.width = ixgbe_bus_width_unknown; + break; + } + + switch (link_status & IXGBE_PCI_LINK_SPEED) { + case IXGBE_PCI_LINK_SPEED_2500: + hw->bus.speed = ixgbe_bus_speed_2500; + break; + case IXGBE_PCI_LINK_SPEED_5000: + hw->bus.speed = ixgbe_bus_speed_5000; + break; + case IXGBE_PCI_LINK_SPEED_8000: + hw->bus.speed = ixgbe_bus_speed_8000; + break; + default: + hw->bus.speed = ixgbe_bus_speed_unknown; + break; + } + + mac->ops.set_lan_id(hw); + + return 0; +} + +/** + * ixgbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices + * @hw: pointer to the HW structure + * + * Determines the LAN function id by reading memory-mapped registers + * and swaps the port value if requested. + **/ +void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) +{ + struct ixgbe_bus_info *bus = &hw->bus; + u32 reg; + + reg = IXGBE_READ_REG(hw, IXGBE_STATUS); + bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT; + bus->lan_id = bus->func; + + /* check for a port swap */ + reg = IXGBE_READ_REG(hw, IXGBE_FACTPS); + if (reg & IXGBE_FACTPS_LFS) + bus->func ^= 0x1; +} + +/** + * ixgbe_stop_adapter_generic - Generic stop Tx/Rx units + * @hw: pointer to hardware structure + * + * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts, + * disables transmit and receive units. The adapter_stopped flag is used by + * the shared code and drivers to determine if the adapter is in a stopped + * state and should not touch the hardware. + **/ +s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) +{ + u32 reg_val; + u16 i; + + /* + * Set the adapter_stopped flag so other driver functions stop touching + * the hardware + */ + hw->adapter_stopped = true; + + /* Disable the receive unit */ + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, 0); + + /* Clear interrupt mask to stop interrupts from being generated */ + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK); + + /* Clear any pending interrupts, flush previous writes */ + IXGBE_READ_REG(hw, IXGBE_EICR); + + /* Disable the transmit unit. Each queue must be disabled. */ + for (i = 0; i < hw->mac.max_tx_queues; i++) + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), IXGBE_TXDCTL_SWFLSH); + + /* Disable the receive unit by stopping each queue */ + for (i = 0; i < hw->mac.max_rx_queues; i++) { + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + reg_val &= ~IXGBE_RXDCTL_ENABLE; + reg_val |= IXGBE_RXDCTL_SWFLSH; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), reg_val); + } + + /* flush all queues disables */ + IXGBE_WRITE_FLUSH(hw); + msleep(2); + + /* + * Prevent the PCI-E bus from from hanging by disabling PCI-E master + * access and verify no pending requests + */ + return ixgbe_disable_pcie_master(hw); +} + +/** + * ixgbe_led_on_generic - Turns on the software controllable LEDs. + * @hw: pointer to hardware structure + * @index: led number to turn on + **/ +s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* To turn on the LED, set mode to ON. */ + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_led_off_generic - Turns off the software controllable LEDs. + * @hw: pointer to hardware structure + * @index: led number to turn off + **/ +s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* To turn off the LED, set mode to OFF. */ + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_init_eeprom_params_generic - Initialize EEPROM params + * @hw: pointer to hardware structure + * + * Initializes the EEPROM parameters ixgbe_eeprom_info within the + * ixgbe_hw struct in order to set up EEPROM access. + **/ +s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + u32 eec; + u16 eeprom_size; + + if (eeprom->type == ixgbe_eeprom_uninitialized) { + eeprom->type = ixgbe_eeprom_none; + /* Set default semaphore delay to 10ms which is a well + * tested value */ + eeprom->semaphore_delay = 10; + /* Clear EEPROM page size, it will be initialized as needed */ + eeprom->word_page_size = 0; + + /* + * Check for EEPROM present first. + * If not present leave as none + */ + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + if (eec & IXGBE_EEC_PRES) { + eeprom->type = ixgbe_eeprom_spi; + + /* + * SPI EEPROM is assumed here. This code would need to + * change if a future EEPROM is not SPI. + */ + eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> + IXGBE_EEC_SIZE_SHIFT); + eeprom->word_size = 1 << (eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); + } + + if (eec & IXGBE_EEC_ADDR_SIZE) + eeprom->address_bits = 16; + else + eeprom->address_bits = 8; + hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: " + "%d\n", eeprom->type, eeprom->word_size, + eeprom->address_bits); + } + + return 0; +} + +/** + * ixgbe_write_eeprom_buffer_bit_bang_generic - Write EEPROM using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to write + * @words: number of word(s) + * @data: 16 bit word(s) to write to EEPROM + * + * Reads 16 bit word(s) from EEPROM through bit-bang method + **/ +s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status = 0; + u16 i, count; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset + words > hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + /* + * The EEPROM page size cannot be queried from the chip. We do lazy + * initialization. It is worth to do that when we write large buffer. + */ + if ((hw->eeprom.word_page_size == 0) && + (words > IXGBE_EEPROM_PAGE_SIZE_MAX)) + ixgbe_detect_eeprom_page_size_generic(hw, offset); + + /* + * We cannot hold synchronization semaphores for too long + * to avoid other entity starvation. However it is more efficient + * to read in bursts than synchronizing access for each word. + */ + for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) { + count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ? + IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i); + status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset + i, + count, &data[i]); + + if (status != 0) + break; + } + +out: + return status; +} + +/** + * ixgbe_write_eeprom_buffer_bit_bang - Writes 16 bit word(s) to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @words: number of word(s) + * @data: 16 bit word(s) to be written to the EEPROM + * + * If ixgbe_eeprom_update_checksum is not called after this function, the + * EEPROM will most likely contain an invalid checksum. + **/ +static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status; + u16 word; + u16 page_size; + u16 i; + u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI; + + /* Prepare the EEPROM for writing */ + status = ixgbe_acquire_eeprom(hw); + + if (status == 0) { + if (ixgbe_ready_eeprom(hw) != 0) { + ixgbe_release_eeprom(hw); + status = IXGBE_ERR_EEPROM; + } + } + + if (status == 0) { + for (i = 0; i < words; i++) { + ixgbe_standby_eeprom(hw); + + /* Send the WRITE ENABLE command (8 bit opcode ) */ + ixgbe_shift_out_eeprom_bits(hw, + IXGBE_EEPROM_WREN_OPCODE_SPI, + IXGBE_EEPROM_OPCODE_BITS); + + ixgbe_standby_eeprom(hw); + + /* + * Some SPI eeproms use the 8th address bit embedded + * in the opcode + */ + if ((hw->eeprom.address_bits == 8) && + ((offset + i) >= 128)) + write_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI; + + /* Send the Write command (8-bit opcode + addr) */ + ixgbe_shift_out_eeprom_bits(hw, write_opcode, + IXGBE_EEPROM_OPCODE_BITS); + ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2), + hw->eeprom.address_bits); + + page_size = hw->eeprom.word_page_size; + + /* Send the data in burst via SPI*/ + do { + word = data[i]; + word = (word >> 8) | (word << 8); + ixgbe_shift_out_eeprom_bits(hw, word, 16); + + if (page_size == 0) + break; + + /* do not wrap around page */ + if (((offset + i) & (page_size - 1)) == + (page_size - 1)) + break; + } while (++i < words); + + ixgbe_standby_eeprom(hw); + msleep(10); + } + /* Done with writing - release the EEPROM */ + ixgbe_release_eeprom(hw); + } + + return status; +} + +/** + * ixgbe_write_eeprom_generic - Writes 16 bit value to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @data: 16 bit word to be written to the EEPROM + * + * If ixgbe_eeprom_update_checksum is not called after this function, the + * EEPROM will most likely contain an invalid checksum. + **/ +s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + s32 status; + + hw->eeprom.ops.init_params(hw); + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data); + +out: + return status; +} + +/** + * ixgbe_read_eeprom_buffer_bit_bang_generic - Read EEPROM using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit words(s) from EEPROM + * @words: number of word(s) + * + * Reads 16 bit word(s) from EEPROM through bit-bang method + **/ +s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status = 0; + u16 i, count; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset + words > hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + /* + * We cannot hold synchronization semaphores for too long + * to avoid other entity starvation. However it is more efficient + * to read in bursts than synchronizing access for each word. + */ + for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) { + count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ? + IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i); + + status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset + i, + count, &data[i]); + + if (status != 0) + break; + } + +out: + return status; +} + +/** + * ixgbe_read_eeprom_buffer_bit_bang - Read EEPROM using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @words: number of word(s) + * @data: read 16 bit word(s) from EEPROM + * + * Reads 16 bit word(s) from EEPROM through bit-bang method + **/ +static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status; + u16 word_in; + u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI; + u16 i; + + /* Prepare the EEPROM for reading */ + status = ixgbe_acquire_eeprom(hw); + + if (status == 0) { + if (ixgbe_ready_eeprom(hw) != 0) { + ixgbe_release_eeprom(hw); + status = IXGBE_ERR_EEPROM; + } + } + + if (status == 0) { + for (i = 0; i < words; i++) { + ixgbe_standby_eeprom(hw); + /* + * Some SPI eeproms use the 8th address bit embedded + * in the opcode + */ + if ((hw->eeprom.address_bits == 8) && + ((offset + i) >= 128)) + read_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI; + + /* Send the READ command (opcode + addr) */ + ixgbe_shift_out_eeprom_bits(hw, read_opcode, + IXGBE_EEPROM_OPCODE_BITS); + ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2), + hw->eeprom.address_bits); + + /* Read the data. */ + word_in = ixgbe_shift_in_eeprom_bits(hw, 16); + data[i] = (word_in >> 8) | (word_in << 8); + } + + /* End this read operation */ + ixgbe_release_eeprom(hw); + } + + return status; +} + +/** + * ixgbe_read_eeprom_bit_bang_generic - Read EEPROM word using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit value from EEPROM + * + * Reads 16 bit value from EEPROM through bit-bang method + **/ +s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 *data) +{ + s32 status; + + hw->eeprom.ops.init_params(hw); + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); + +out: + return status; +} + +/** + * ixgbe_read_eerd_buffer_generic - Read EEPROM word(s) using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @words: number of word(s) + * @data: 16 bit word(s) from the EEPROM + * + * Reads a 16 bit word(s) from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + u32 eerd; + s32 status = 0; + u32 i; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + for (i = 0; i < words; i++) { + eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) + + IXGBE_EEPROM_RW_REG_START; + + IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd); + status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_READ); + + if (status == 0) { + data[i] = (IXGBE_READ_REG(hw, IXGBE_EERD) >> + IXGBE_EEPROM_RW_REG_DATA); + } else { + hw_dbg(hw, "Eeprom read timed out\n"); + goto out; + } + } +out: + return status; +} + +/** + * ixgbe_detect_eeprom_page_size_generic - Detect EEPROM page size + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be used as a scratch pad + * + * Discover EEPROM page size by writing marching data at given offset. + * This function is called only when we are writing a new large buffer + * at given offset so the data would be overwritten anyway. + **/ +static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, + u16 offset) +{ + u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX]; + s32 status = 0; + u16 i; + + for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++) + data[i] = i; + + hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX; + status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, + IXGBE_EEPROM_PAGE_SIZE_MAX, data); + hw->eeprom.word_page_size = 0; + if (status != 0) + goto out; + + status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); + if (status != 0) + goto out; + + /* + * When writing in burst more than the actual page size + * EEPROM address wraps around current page. + */ + hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX - data[0]; + + hw_dbg(hw, "Detected EEPROM page size = %d words.", + hw->eeprom.word_page_size); +out: + return status; +} + +/** + * ixgbe_read_eerd_generic - Read EEPROM word using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data); +} + +/** + * ixgbe_write_eewr_buffer_generic - Write EEPROM word(s) using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @words: number of word(s) + * @data: word(s) write to the EEPROM + * + * Write a 16 bit word(s) to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + u32 eewr; + s32 status = 0; + u16 i; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + for (i = 0; i < words; i++) { + eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | + (data[i] << IXGBE_EEPROM_RW_REG_DATA) | + IXGBE_EEPROM_RW_REG_START; + + status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE); + if (status != 0) { + hw_dbg(hw, "Eeprom write EEWR timed out\n"); + goto out; + } + + IXGBE_WRITE_REG(hw, IXGBE_EEWR, eewr); + + status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE); + if (status != 0) { + hw_dbg(hw, "Eeprom write EEWR timed out\n"); + goto out; + } + } + +out: + return status; +} + +/** + * ixgbe_write_eewr_generic - Write EEPROM word using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @data: word write to the EEPROM + * + * Write a 16 bit word to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data); +} + +/** + * ixgbe_poll_eerd_eewr_done - Poll EERD read or EEWR write status + * @hw: pointer to hardware structure + * @ee_reg: EEPROM flag for polling + * + * Polls the status bit (bit 1) of the EERD or EEWR to determine when the + * read or write is done respectively. + **/ +s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg) +{ + u32 i; + u32 reg; + s32 status = IXGBE_ERR_EEPROM; + + for (i = 0; i < IXGBE_EERD_EEWR_ATTEMPTS; i++) { + if (ee_reg == IXGBE_NVM_POLL_READ) + reg = IXGBE_READ_REG(hw, IXGBE_EERD); + else + reg = IXGBE_READ_REG(hw, IXGBE_EEWR); + + if (reg & IXGBE_EEPROM_RW_REG_DONE) { + status = 0; + break; + } + udelay(5); + } + return status; +} + +/** + * ixgbe_acquire_eeprom - Acquire EEPROM using bit-bang + * @hw: pointer to hardware structure + * + * Prepares EEPROM for access using bit-bang method. This function should + * be called before issuing a command to the EEPROM. + **/ +static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 eec; + u32 i; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) + != 0) + status = IXGBE_ERR_SWFW_SYNC; + + if (status == 0) { + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* Request EEPROM Access */ + eec |= IXGBE_EEC_REQ; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + + for (i = 0; i < IXGBE_EEPROM_GRANT_ATTEMPTS; i++) { + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + if (eec & IXGBE_EEC_GNT) + break; + udelay(5); + } + + /* Release if grant not acquired */ + if (!(eec & IXGBE_EEC_GNT)) { + eec &= ~IXGBE_EEC_REQ; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + hw_dbg(hw, "Could not acquire EEPROM grant\n"); + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + status = IXGBE_ERR_EEPROM; + } + + /* Setup EEPROM for Read/Write */ + if (status == 0) { + /* Clear CS and SK */ + eec &= ~(IXGBE_EEC_CS | IXGBE_EEC_SK); + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); + } + } + return status; +} + +/** + * ixgbe_get_eeprom_semaphore - Get hardware semaphore + * @hw: pointer to hardware structure + * + * Sets the hardware semaphores so EEPROM access can occur for bit-bang method + **/ +static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_EEPROM; + u32 timeout = 2000; + u32 i; + u32 swsm; + + /* Get SMBI software semaphore between device drivers first */ + for (i = 0; i < timeout; i++) { + /* + * If the SMBI bit is 0 when we read it, then the bit will be + * set and we have the semaphore + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (!(swsm & IXGBE_SWSM_SMBI)) { + status = 0; + break; + } + udelay(50); + } + + if (i == timeout) { + hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore " + "not granted.\n"); + /* + * this release is particularly important because our attempts + * above to get the semaphore may have succeeded, and if there + * was a timeout, we should unconditionally clear the semaphore + * bits to free the driver to make progress + */ + ixgbe_release_eeprom_semaphore(hw); + + udelay(50); + /* + * one last try + * If the SMBI bit is 0 when we read it, then the bit will be + * set and we have the semaphore + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (!(swsm & IXGBE_SWSM_SMBI)) + status = 0; + } + + /* Now get the semaphore between SW/FW through the SWESMBI bit */ + if (status == 0) { + for (i = 0; i < timeout; i++) { + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + + /* Set the SW EEPROM semaphore bit to request access */ + swsm |= IXGBE_SWSM_SWESMBI; + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + + /* + * If we set the bit successfully then we got the + * semaphore. + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (swsm & IXGBE_SWSM_SWESMBI) + break; + + udelay(50); + } + + /* + * Release semaphores and return error if SW EEPROM semaphore + * was not granted because we don't have access to the EEPROM + */ + if (i >= timeout) { + hw_dbg(hw, "SWESMBI Software EEPROM semaphore " + "not granted.\n"); + ixgbe_release_eeprom_semaphore(hw); + status = IXGBE_ERR_EEPROM; + } + } else { + hw_dbg(hw, "Software semaphore SMBI between device drivers " + "not granted.\n"); + } + + return status; +} + +/** + * ixgbe_release_eeprom_semaphore - Release hardware semaphore + * @hw: pointer to hardware structure + * + * This function clears hardware semaphore bits. + **/ +static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw) +{ + u32 swsm; + + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + + /* Release both semaphores by writing 0 to the bits SWESMBI and SMBI */ + swsm &= ~(IXGBE_SWSM_SWESMBI | IXGBE_SWSM_SMBI); + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_ready_eeprom - Polls for EEPROM ready + * @hw: pointer to hardware structure + **/ +static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 i; + u8 spi_stat_reg; + + /* + * Read "Status Register" repeatedly until the LSB is cleared. The + * EEPROM will signal that the command has been completed by clearing + * bit 0 of the internal status register. If it's not cleared within + * 5 milliseconds, then error out. + */ + for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) { + ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI, + IXGBE_EEPROM_OPCODE_BITS); + spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8); + if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI)) + break; + + udelay(5); + ixgbe_standby_eeprom(hw); + }; + + /* + * On some parts, SPI write time could vary from 0-20mSec on 3.3V + * devices (and only 0-5mSec on 5V devices) + */ + if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) { + hw_dbg(hw, "SPI EEPROM Status error\n"); + status = IXGBE_ERR_EEPROM; + } + + return status; +} + +/** + * ixgbe_standby_eeprom - Returns EEPROM to a "standby" state + * @hw: pointer to hardware structure + **/ +static void ixgbe_standby_eeprom(struct ixgbe_hw *hw) +{ + u32 eec; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* Toggle CS to flush commands */ + eec |= IXGBE_EEC_CS; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); + eec &= ~IXGBE_EEC_CS; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); +} + +/** + * ixgbe_shift_out_eeprom_bits - Shift data bits out to the EEPROM. + * @hw: pointer to hardware structure + * @data: data to send to the EEPROM + * @count: number of bits to shift out + **/ +static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, + u16 count) +{ + u32 eec; + u32 mask; + u32 i; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* + * Mask is used to shift "count" bits of "data" out to the EEPROM + * one bit at a time. Determine the starting bit based on count + */ + mask = 0x01 << (count - 1); + + for (i = 0; i < count; i++) { + /* + * A "1" is shifted out to the EEPROM by setting bit "DI" to a + * "1", and then raising and then lowering the clock (the SK + * bit controls the clock input to the EEPROM). A "0" is + * shifted out to the EEPROM by setting "DI" to "0" and then + * raising and then lowering the clock. + */ + if (data & mask) + eec |= IXGBE_EEC_DI; + else + eec &= ~IXGBE_EEC_DI; + + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + + udelay(1); + + ixgbe_raise_eeprom_clk(hw, &eec); + ixgbe_lower_eeprom_clk(hw, &eec); + + /* + * Shift mask to signify next bit of data to shift in to the + * EEPROM + */ + mask = mask >> 1; + }; + + /* We leave the "DI" bit set to "0" when we leave this routine. */ + eec &= ~IXGBE_EEC_DI; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_shift_in_eeprom_bits - Shift data bits in from the EEPROM + * @hw: pointer to hardware structure + **/ +static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count) +{ + u32 eec; + u32 i; + u16 data = 0; + + /* + * In order to read a register from the EEPROM, we need to shift + * 'count' bits in from the EEPROM. Bits are "shifted in" by raising + * the clock input to the EEPROM (setting the SK bit), and then reading + * the value of the "DO" bit. During this "shifting in" process the + * "DI" bit should always be clear. + */ + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + eec &= ~(IXGBE_EEC_DO | IXGBE_EEC_DI); + + for (i = 0; i < count; i++) { + data = data << 1; + ixgbe_raise_eeprom_clk(hw, &eec); + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + eec &= ~(IXGBE_EEC_DI); + if (eec & IXGBE_EEC_DO) + data |= 1; + + ixgbe_lower_eeprom_clk(hw, &eec); + } + + return data; +} + +/** + * ixgbe_raise_eeprom_clk - Raises the EEPROM's clock input. + * @hw: pointer to hardware structure + * @eec: EEC register's current value + **/ +static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec) +{ + /* + * Raise the clock input to the EEPROM + * (setting the SK bit), then delay + */ + *eec = *eec | IXGBE_EEC_SK; + IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); +} + +/** + * ixgbe_lower_eeprom_clk - Lowers the EEPROM's clock input. + * @hw: pointer to hardware structure + * @eecd: EECD's current value + **/ +static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec) +{ + /* + * Lower the clock input to the EEPROM (clearing the SK bit), then + * delay + */ + *eec = *eec & ~IXGBE_EEC_SK; + IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); +} + +/** + * ixgbe_release_eeprom - Release EEPROM, release semaphores + * @hw: pointer to hardware structure + **/ +static void ixgbe_release_eeprom(struct ixgbe_hw *hw) +{ + u32 eec; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + eec |= IXGBE_EEC_CS; /* Pull CS high */ + eec &= ~IXGBE_EEC_SK; /* Lower SCK */ + + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + + udelay(1); + + /* Stop requesting EEPROM access */ + eec &= ~IXGBE_EEC_REQ; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + + /* Delay before attempt to obtain semaphore again to allow FW access */ + msleep(hw->eeprom.semaphore_delay); +} + +/** + * ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum + * @hw: pointer to hardware structure + **/ +u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) +{ + u16 i; + u16 j; + u16 checksum = 0; + u16 length = 0; + u16 pointer = 0; + u16 word = 0; + + /* Include 0x0-0x3F in the checksum */ + for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { + if (hw->eeprom.ops.read(hw, i, &word) != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + checksum += word; + } + + /* Include all data from pointers except for the fw pointer */ + for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { + hw->eeprom.ops.read(hw, i, &pointer); + + /* Make sure the pointer seems valid */ + if (pointer != 0xFFFF && pointer != 0) { + hw->eeprom.ops.read(hw, pointer, &length); + + if (length != 0xFFFF && length != 0) { + for (j = pointer+1; j <= pointer+length; j++) { + hw->eeprom.ops.read(hw, j, &word); + checksum += word; + } + } + } + } + + checksum = (u16)IXGBE_EEPROM_SUM - checksum; + + return checksum; +} + +/** + * ixgbe_validate_eeprom_checksum_generic - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum. If the + * caller does not need checksum_val, the value can be NULL. + **/ +s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, + u16 *checksum_val) +{ + s32 status; + u16 checksum; + u16 read_checksum = 0; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status == 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + + hw->eeprom.ops.read(hw, IXGBE_EEPROM_CHECKSUM, &read_checksum); + + /* + * Verify read checksum from EEPROM is the same as + * calculated checksum + */ + if (read_checksum != checksum) + status = IXGBE_ERR_EEPROM_CHECKSUM; + + /* If the user cares, return the calculated checksum */ + if (checksum_val) + *checksum_val = checksum; + } else { + hw_dbg(hw, "EEPROM read failed\n"); + } + + return status; +} + +/** + * ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum + * @hw: pointer to hardware structure + **/ +s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw) +{ + s32 status; + u16 checksum; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status == 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + status = hw->eeprom.ops.write(hw, IXGBE_EEPROM_CHECKSUM, + checksum); + } else { + hw_dbg(hw, "EEPROM read failed\n"); + } + + return status; +} + +/** + * ixgbe_validate_mac_addr - Validate MAC address + * @mac_addr: pointer to MAC address. + * + * Tests a MAC address to ensure it is a valid Individual Address + **/ +s32 ixgbe_validate_mac_addr(u8 *mac_addr) +{ + s32 status = 0; + + /* Make sure it is not a multicast address */ + if (IXGBE_IS_MULTICAST(mac_addr)) { + hw_dbg(hw, "MAC address is multicast\n"); + status = IXGBE_ERR_INVALID_MAC_ADDR; + /* Not a broadcast address */ + } else if (IXGBE_IS_BROADCAST(mac_addr)) { + hw_dbg(hw, "MAC address is broadcast\n"); + status = IXGBE_ERR_INVALID_MAC_ADDR; + /* Reject the zero address */ + } else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 && + mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) { + hw_dbg(hw, "MAC address is all zeros\n"); + status = IXGBE_ERR_INVALID_MAC_ADDR; + } + return status; +} + +/** + * ixgbe_set_rar_generic - Set Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * @addr: Address to put into receive address register + * @vmdq: VMDq "set" or "pool" index + * @enable_addr: set flag that address is active + * + * Puts an ethernet address into a receive address register. + **/ +s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr) +{ + u32 rar_low, rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (index >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + /* setup VMDq pool selection before this RAR gets enabled */ + hw->mac.ops.set_vmdq(hw, index, vmdq); + + /* + * HW expects these in little endian so we reverse the byte + * order from network order (big endian) to little endian + */ + rar_low = ((u32)addr[0] | + ((u32)addr[1] << 8) | + ((u32)addr[2] << 16) | + ((u32)addr[3] << 24)); + /* + * Some parts put the VMDq setting in the extra RAH bits, + * so save everything except the lower 16 bits that hold part + * of the address and the address valid bit. + */ + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); + rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); + rar_high |= ((u32)addr[4] | ((u32)addr[5] << 8)); + + if (enable_addr != 0) + rar_high |= IXGBE_RAH_AV; + + IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low); + IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); + + return 0; +} + +/** + * ixgbe_clear_rar_generic - Remove Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * + * Clears an ethernet address from a receive address register. + **/ +s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (index >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + /* + * Some parts put the VMDq setting in the extra RAH bits, + * so save everything except the lower 16 bits that hold part + * of the address and the address valid bit. + */ + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); + rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); + + IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0); + IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); + + /* clear VMDq pool/queue selection for this RAR */ + hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL); + + return 0; +} + +/** + * ixgbe_init_rx_addrs_generic - Initializes receive address filters. + * @hw: pointer to hardware structure + * + * Places the MAC address in receive address register 0 and clears the rest + * of the receive address registers. Clears the multicast table. Assumes + * the receiver is in reset when the routine is called. + **/ +s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw) +{ + u32 i; + u32 rar_entries = hw->mac.num_rar_entries; + + /* + * If the current mac address is valid, assume it is a software override + * to the permanent address. + * Otherwise, use the permanent address from the eeprom. + */ + if (ixgbe_validate_mac_addr(hw->mac.addr) == + IXGBE_ERR_INVALID_MAC_ADDR) { + /* Get the MAC address from the RAR0 for later reference */ + hw->mac.ops.get_mac_addr(hw, hw->mac.addr); + + hw_dbg(hw, " Keeping Current RAR0 Addr =%.2X %.2X %.2X ", + hw->mac.addr[0], hw->mac.addr[1], + hw->mac.addr[2]); + hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3], + hw->mac.addr[4], hw->mac.addr[5]); + } else { + /* Setup the receive address. */ + hw_dbg(hw, "Overriding MAC Address in RAR[0]\n"); + hw_dbg(hw, " New MAC Addr =%.2X %.2X %.2X ", + hw->mac.addr[0], hw->mac.addr[1], + hw->mac.addr[2]); + hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3], + hw->mac.addr[4], hw->mac.addr[5]); + + hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); + + /* clear VMDq pool/queue selection for RAR 0 */ + hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL); + } + hw->addr_ctrl.overflow_promisc = 0; + + hw->addr_ctrl.rar_used_count = 1; + + /* Zero out the other receive addresses. */ + hw_dbg(hw, "Clearing RAR[1-%d]\n", rar_entries - 1); + for (i = 1; i < rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0); + } + + /* Clear the MTA */ + hw->addr_ctrl.mta_in_use = 0; + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type); + + hw_dbg(hw, " Clearing MTA\n"); + for (i = 0; i < hw->mac.mcft_size; i++) + IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0); + + ixgbe_init_uta_tables(hw); + + return 0; +} + +/** + * ixgbe_add_uc_addr - Adds a secondary unicast address. + * @hw: pointer to hardware structure + * @addr: new address + * + * Adds it to unused receive address register or goes into promiscuous mode. + **/ +void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) +{ + u32 rar_entries = hw->mac.num_rar_entries; + u32 rar; + + hw_dbg(hw, " UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + /* + * Place this address in the RAR if there is room, + * else put the controller into promiscuous mode + */ + if (hw->addr_ctrl.rar_used_count < rar_entries) { + rar = hw->addr_ctrl.rar_used_count; + hw->mac.ops.set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); + hw_dbg(hw, "Added a secondary address to RAR[%d]\n", rar); + hw->addr_ctrl.rar_used_count++; + } else { + hw->addr_ctrl.overflow_promisc++; + } + + hw_dbg(hw, "ixgbe_add_uc_addr Complete\n"); +} + +/** + * ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses + * @hw: pointer to hardware structure + * @addr_list: the list of new addresses + * @addr_count: number of addresses + * @next: iterator function to walk the address list + * + * The given list replaces any existing list. Clears the secondary addrs from + * receive address registers. Uses unused receive address registers for the + * first secondary addresses, and falls back to promiscuous mode as needed. + * + * Drivers using secondary unicast addresses must set user_set_promisc when + * manually putting the device into promiscuous mode. + **/ +s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr next) +{ + u8 *addr; + u32 i; + u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc; + u32 uc_addr_in_use; + u32 fctrl; + u32 vmdq; + + /* + * Clear accounting of old secondary address list, + * don't count RAR[0] + */ + uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1; + hw->addr_ctrl.rar_used_count -= uc_addr_in_use; + hw->addr_ctrl.overflow_promisc = 0; + + /* Zero out the other receive addresses */ + hw_dbg(hw, "Clearing RAR[1-%d]\n", uc_addr_in_use+1); + for (i = 0; i < uc_addr_in_use; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RAL(1+i), 0); + IXGBE_WRITE_REG(hw, IXGBE_RAH(1+i), 0); + } + + /* Add the new addresses */ + for (i = 0; i < addr_count; i++) { + hw_dbg(hw, " Adding the secondary addresses:\n"); + addr = next(hw, &addr_list, &vmdq); + ixgbe_add_uc_addr(hw, addr, vmdq); + } + + if (hw->addr_ctrl.overflow_promisc) { + /* enable promisc if not already in overflow or set by user */ + if (!old_promisc_setting && !hw->addr_ctrl.user_set_promisc) { + hw_dbg(hw, " Entering address overflow promisc mode\n"); + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl |= IXGBE_FCTRL_UPE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + } + } else { + /* only disable if set by overflow, not by user */ + if (old_promisc_setting && !hw->addr_ctrl.user_set_promisc) { + hw_dbg(hw, " Leaving address overflow promisc mode\n"); + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl &= ~IXGBE_FCTRL_UPE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + } + } + + hw_dbg(hw, "ixgbe_update_uc_addr_list_generic Complete\n"); + return 0; +} + +/** + * ixgbe_mta_vector - Determines bit-vector in multicast table to set + * @hw: pointer to hardware structure + * @mc_addr: the multicast address + * + * Extracts the 12 bits, from a multicast address, to determine which + * bit-vector to set in the multicast table. The hardware uses 12 bits, from + * incoming rx multicast addresses, to determine the bit-vector to check in + * the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set + * by the MO field of the MCSTCTRL. The MO field is set during initialization + * to mc_filter_type. + **/ +static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr) +{ + u32 vector = 0; + + switch (hw->mac.mc_filter_type) { + case 0: /* use bits [47:36] of the address */ + vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4)); + break; + case 1: /* use bits [46:35] of the address */ + vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5)); + break; + case 2: /* use bits [45:34] of the address */ + vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6)); + break; + case 3: /* use bits [43:32] of the address */ + vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8)); + break; + default: /* Invalid mc_filter_type */ + hw_dbg(hw, "MC filter type param set incorrectly\n"); + break; + } + + /* vector can only be 12-bits or boundary will be exceeded */ + vector &= 0xFFF; + return vector; +} + +/** + * ixgbe_set_mta - Set bit-vector in multicast table + * @hw: pointer to hardware structure + * @hash_value: Multicast address hash value + * + * Sets the bit-vector in the multicast table. + **/ +void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr) +{ + u32 vector; + u32 vector_bit; + u32 vector_reg; + + hw->addr_ctrl.mta_in_use++; + + vector = ixgbe_mta_vector(hw, mc_addr); + hw_dbg(hw, " bit-vector = 0x%03X\n", vector); + + /* + * The MTA is a register array of 128 32-bit registers. It is treated + * like an array of 4096 bits. We want to set bit + * BitArray[vector_value]. So we figure out what register the bit is + * in, read it, OR in the new bit, then write back the new value. The + * register is determined by the upper 7 bits of the vector value and + * the bit within that register are determined by the lower 5 bits of + * the value. + */ + vector_reg = (vector >> 5) & 0x7F; + vector_bit = vector & 0x1F; + hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit); +} + +/** + * ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses + * @hw: pointer to hardware structure + * @mc_addr_list: the list of new multicast addresses + * @mc_addr_count: number of addresses + * @next: iterator function to walk the multicast address list + * @clear: flag, when set clears the table beforehand + * + * When the clear flag is set, the given list replaces any existing list. + * Hashes the given addresses into the multicast table. + **/ +s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr next, + bool clear) +{ + u32 i; + u32 vmdq; + + /* + * Set the new number of MC addresses that we are being requested to + * use. + */ + hw->addr_ctrl.num_mc_addrs = mc_addr_count; + hw->addr_ctrl.mta_in_use = 0; + + /* Clear mta_shadow */ + if (clear) { + hw_dbg(hw, " Clearing MTA\n"); + memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); + } + + /* Update mta_shadow */ + for (i = 0; i < mc_addr_count; i++) { + hw_dbg(hw, " Adding the multicast addresses:\n"); + ixgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq)); + } + + /* Enable mta */ + for (i = 0; i < hw->mac.mcft_size; i++) + IXGBE_WRITE_REG_ARRAY(hw, IXGBE_MTA(0), i, + hw->mac.mta_shadow[i]); + + if (hw->addr_ctrl.mta_in_use > 0) + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, + IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type); + + hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n"); + return 0; +} + +/** + * ixgbe_enable_mc_generic - Enable multicast address in RAR + * @hw: pointer to hardware structure + * + * Enables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_addr_filter_info *a = &hw->addr_ctrl; + + if (a->mta_in_use > 0) + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE | + hw->mac.mc_filter_type); + + return 0; +} + +/** + * ixgbe_disable_mc_generic - Disable multicast address in RAR + * @hw: pointer to hardware structure + * + * Disables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_addr_filter_info *a = &hw->addr_ctrl; + + if (a->mta_in_use > 0) + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type); + + return 0; +} + +/** + * ixgbe_fc_enable_generic - Enable flow control + * @hw: pointer to hardware structure + * + * Enable flow control according to the current settings. + **/ +s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 mflcn_reg, fccfg_reg; + u32 reg; + u32 fcrtl, fcrth; + int i; + + /* Validate the water mark configuration */ + if (!hw->fc.pause_time) { + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + + /* Low water mark of zero causes XOFF floods */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + if (!hw->fc.low_water[i] || + hw->fc.low_water[i] >= hw->fc.high_water[i]) { + hw_dbg(hw, "Invalid water mark configuration\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + } + } + + /* Negotiate the fc mode to use */ + ixgbe_fc_autoneg(hw); + + /* Disable any previous flow control settings */ + mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN); + mflcn_reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE); + + fccfg_reg = IXGBE_READ_REG(hw, IXGBE_FCCFG); + fccfg_reg &= ~(IXGBE_FCCFG_TFCE_802_3X | IXGBE_FCCFG_TFCE_PRIORITY); + + /* + * The possible values of fc.current_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.current_mode) { + case ixgbe_fc_none: + /* + * Flow control is disabled by software override or autoneg. + * The code below will actually disable it in the HW. + */ + break; + case ixgbe_fc_rx_pause: + /* + * Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + mflcn_reg |= IXGBE_MFLCN_RFCE; + break; + case ixgbe_fc_tx_pause: + /* + * Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X; + break; + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + mflcn_reg |= IXGBE_MFLCN_RFCE; + fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X; + break; + default: + hw_dbg(hw, "Flow control param set incorrectly\n"); + ret_val = IXGBE_ERR_CONFIG; + goto out; + break; + } + + /* Set 802.3x based flow control settings. */ + mflcn_reg |= IXGBE_MFLCN_DPF; + IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg); + IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg); + + + /* Set up and enable Rx high/low water mark thresholds, enable XON. */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; + IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl); + fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; + } else { + IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0); + /* + * In order to prevent Tx hangs when the internal Tx + * switch is enabled we must set the high water mark + * to the maximum FCRTH value. This allows the Tx + * switch to function even under heavy Rx workloads. + */ + fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32; + } + + IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth); + } + + /* Configure pause time (2 TCs per register) */ + reg = hw->fc.pause_time * 0x00010001; + for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++) + IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg); + + /* Configure flow control refresh threshold value */ + IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2); + +out: + return ret_val; +} + +/** + * ixgbe_negotiate_fc - Negotiate flow control + * @hw: pointer to hardware structure + * @adv_reg: flow control advertised settings + * @lp_reg: link partner's flow control settings + * @adv_sym: symmetric pause bit in advertisement + * @adv_asm: asymmetric pause bit in advertisement + * @lp_sym: symmetric pause bit in link partner advertisement + * @lp_asm: asymmetric pause bit in link partner advertisement + * + * Find the intersection between advertised settings and link partner's + * advertised settings + **/ +static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) +{ + if ((!(adv_reg)) || (!(lp_reg))) + return IXGBE_ERR_FC_NOT_NEGOTIATED; + + if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) { + /* + * Now we need to check if the user selected Rx ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise RX + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if (hw->fc.requested_mode == ixgbe_fc_full) { + hw->fc.current_mode = ixgbe_fc_full; + hw_dbg(hw, "Flow Control = FULL.\n"); + } else { + hw->fc.current_mode = ixgbe_fc_rx_pause; + hw_dbg(hw, "Flow Control=RX PAUSE frames only\n"); + } + } else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) && + (lp_reg & lp_sym) && (lp_reg & lp_asm)) { + hw->fc.current_mode = ixgbe_fc_tx_pause; + hw_dbg(hw, "Flow Control = TX PAUSE frames only.\n"); + } else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) && + !(lp_reg & lp_sym) && (lp_reg & lp_asm)) { + hw->fc.current_mode = ixgbe_fc_rx_pause; + hw_dbg(hw, "Flow Control = RX PAUSE frames only.\n"); + } else { + hw->fc.current_mode = ixgbe_fc_none; + hw_dbg(hw, "Flow Control = NONE.\n"); + } + return 0; +} + +/** + * ixgbe_fc_autoneg_fiber - Enable flow control on 1 gig fiber + * @hw: pointer to hardware structure + * + * Enable flow control according on 1 gig fiber. + **/ +static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw) +{ + u32 pcs_anadv_reg, pcs_lpab_reg, linkstat; + s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; + + /* + * On multispeed fiber at 1g, bail out if + * - link is up but AN did not complete, or if + * - link is up and AN completed but timed out + */ + + linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); + if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) || + (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1)) + goto out; + + pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); + pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); + + ret_val = ixgbe_negotiate_fc(hw, pcs_anadv_reg, + pcs_lpab_reg, IXGBE_PCS1GANA_SYM_PAUSE, + IXGBE_PCS1GANA_ASM_PAUSE, + IXGBE_PCS1GANA_SYM_PAUSE, + IXGBE_PCS1GANA_ASM_PAUSE); + +out: + return ret_val; +} + +/** + * ixgbe_fc_autoneg_backplane - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + * + * Enable flow control according to IEEE clause 37. + **/ +static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw) +{ + u32 links2, anlp1_reg, autoc_reg, links; + s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; + + /* + * On backplane, bail out if + * - backplane autoneg was not completed, or if + * - we are 82599 and link partner is not AN enabled + */ + links = IXGBE_READ_REG(hw, IXGBE_LINKS); + if ((links & IXGBE_LINKS_KX_AN_COMP) == 0) + goto out; + + if (hw->mac.type == ixgbe_mac_82599EB) { + links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2); + if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0) + goto out; + } + /* + * Read the 10g AN autoc and LP ability registers and resolve + * local flow control settings accordingly + */ + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1); + + ret_val = ixgbe_negotiate_fc(hw, autoc_reg, + anlp1_reg, IXGBE_AUTOC_SYM_PAUSE, IXGBE_AUTOC_ASM_PAUSE, + IXGBE_ANLP1_SYM_PAUSE, IXGBE_ANLP1_ASM_PAUSE); + +out: + return ret_val; +} + +/** + * ixgbe_fc_autoneg_copper - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + * + * Enable flow control according to IEEE clause 37. + **/ +static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw) +{ + u16 technology_ability_reg = 0; + u16 lp_technology_ability_reg = 0; + + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &technology_ability_reg); + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_LP, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &lp_technology_ability_reg); + + return ixgbe_negotiate_fc(hw, (u32)technology_ability_reg, + (u32)lp_technology_ability_reg, + IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE, + IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE); +} + +/** + * ixgbe_fc_autoneg - Configure flow control + * @hw: pointer to hardware structure + * + * Compares our advertised flow control capabilities to those advertised by + * our link partner, and determines the proper flow control mode to use. + **/ +void ixgbe_fc_autoneg(struct ixgbe_hw *hw) +{ + s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; + ixgbe_link_speed speed; + bool link_up; + + /* + * AN should have completed when the cable was plugged in. + * Look for reasons to bail out. Bail out if: + * - FC autoneg is disabled, or if + * - link is not up. + */ + if (hw->fc.disable_fc_autoneg) + goto out; + + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (!link_up) + goto out; + + switch (hw->phy.media_type) { + /* Autoneg flow control on fiber adapters */ + case ixgbe_media_type_fiber: + if (speed == IXGBE_LINK_SPEED_1GB_FULL) + ret_val = ixgbe_fc_autoneg_fiber(hw); + break; + + /* Autoneg flow control on backplane adapters */ + case ixgbe_media_type_backplane: + ret_val = ixgbe_fc_autoneg_backplane(hw); + break; + + /* Autoneg flow control on copper adapters */ + case ixgbe_media_type_copper: + if (ixgbe_device_supports_autoneg_fc(hw) == 0) + ret_val = ixgbe_fc_autoneg_copper(hw); + break; + + default: + break; + } + +out: + if (ret_val == 0) { + hw->fc.fc_was_autonegged = true; + } else { + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; + } +} + +/** + * ixgbe_disable_pcie_master - Disable PCI-express master access + * @hw: pointer to hardware structure + * + * Disables PCI-Express master access and verifies there are no pending + * requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable + * bit hasn't caused the master requests to be disabled, else 0 + * is returned signifying master requests disabled. + **/ +s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 i; + + /* Always set this bit to ensure any future transactions are blocked */ + IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); + + /* Exit if master requets are blocked */ + if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) + goto out; + + /* Poll for master request bit to clear */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + udelay(100); + if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) + goto out; + } + + /* + * Two consecutive resets are required via CTRL.RST per datasheet + * 5.2.5.3.2 Master Disable. We set a flag to inform the reset routine + * of this need. The first reset prevents new master requests from + * being issued by our device. We then must wait 1usec or more for any + * remaining completions from the PCIe bus to trickle in, and then reset + * again to clear out any effects they may have had on our device. + */ + hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); + hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + + /* + * Before proceeding, make sure that the PCIe block does not have + * transactions pending. + */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + udelay(100); + if (!(IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS) & + IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING)) + goto out; + } + + hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); + status = IXGBE_ERR_MASTER_REQUESTS_PENDING; + +out: + return status; +} + +/** + * ixgbe_acquire_swfw_sync - Acquire SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to acquire + * + * Acquires the SWFW semaphore through the GSSR register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask) +{ + u32 gssr; + u32 swmask = mask; + u32 fwmask = mask << 5; + s32 timeout = 200; + + while (timeout) { + /* + * SW EEPROM semaphore bit is used for access to all + * SW_FW_SYNC/GSSR bits (not just EEPROM) + */ + if (ixgbe_get_eeprom_semaphore(hw)) + return IXGBE_ERR_SWFW_SYNC; + + gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); + if (!(gssr & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) or other software + * thread currently using resource (swmask) + */ + ixgbe_release_eeprom_semaphore(hw); + msleep(5); + timeout--; + } + + if (!timeout) { + hw_dbg(hw, "Driver can't access resource, SW_FW_SYNC timeout.\n"); + return IXGBE_ERR_SWFW_SYNC; + } + + gssr |= swmask; + IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr); + + ixgbe_release_eeprom_semaphore(hw); + return 0; +} + +/** + * ixgbe_release_swfw_sync - Release SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to release + * + * Releases the SWFW semaphore through the GSSR register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask) +{ + u32 gssr; + u32 swmask = mask; + + ixgbe_get_eeprom_semaphore(hw); + + gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); + gssr &= ~swmask; + IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr); + + ixgbe_release_eeprom_semaphore(hw); +} + +/** + * ixgbe_disable_sec_rx_path_generic - Stops the receive data path + * @hw: pointer to hardware structure + * + * Stops the receive data path and waits for the HW to internally empty + * the Rx security block + **/ +s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw) +{ +#define IXGBE_MAX_SECRX_POLL 40 + + int i; + int secrxreg; + + secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + secrxreg |= IXGBE_SECRXCTRL_RX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg); + for (i = 0; i < IXGBE_MAX_SECRX_POLL; i++) { + secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT); + if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY) + break; + else + /* Use interrupt-safe sleep just in case */ + udelay(1000); + } + + /* For informational purposes only */ + if (i >= IXGBE_MAX_SECRX_POLL) + hw_dbg(hw, "Rx unit being enabled before security " + "path fully disabled. Continuing with init.\n"); + + return 0; +} + +/** + * ixgbe_enable_sec_rx_path_generic - Enables the receive data path + * @hw: pointer to hardware structure + * + * Enables the receive data path. + **/ +s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw) +{ + int secrxreg; + + secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_enable_rx_dma_generic - Enable the Rx DMA unit + * @hw: pointer to hardware structure + * @regval: register value to write to RXCTRL + * + * Enables the Rx DMA unit + **/ +s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval) +{ + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval); + + return 0; +} + +/** + * ixgbe_blink_led_start_generic - Blink LED based on index. + * @hw: pointer to hardware structure + * @index: led number to blink + **/ +s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) +{ + ixgbe_link_speed speed = 0; + bool link_up = 0; + u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* + * Link must be up to auto-blink the LEDs; + * Force it if link is down. + */ + hw->mac.ops.check_link(hw, &speed, &link_up, false); + + if (!link_up) { + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + autoc_reg |= IXGBE_AUTOC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + IXGBE_WRITE_FLUSH(hw); + msleep(10); + } + + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg |= IXGBE_LED_BLINK(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_blink_led_stop_generic - Stop blinking LED based on index. + * @hw: pointer to hardware structure + * @index: led number to stop blinking + **/ +s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + autoc_reg &= ~IXGBE_AUTOC_FLU; + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg &= ~IXGBE_LED_BLINK(index); + led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_get_san_mac_addr_offset - Get SAN MAC address offset from the EEPROM + * @hw: pointer to hardware structure + * @san_mac_offset: SAN MAC address offset + * + * This function will read the EEPROM location for the SAN MAC address + * pointer, and returns the value at that location. This is used in both + * get and set mac_addr routines. + **/ +static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw, + u16 *san_mac_offset) +{ + /* + * First read the EEPROM pointer to see if the MAC addresses are + * available. + */ + hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset); + + return 0; +} + +/** + * ixgbe_get_san_mac_addr_generic - SAN MAC address retrieval from the EEPROM + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Reads the SAN MAC address from the EEPROM, if it's available. This is + * per-port, so set_lan_id() must be called before reading the addresses. + * set_lan_id() is called by identify_sfp(), but this cannot be relied + * upon for non-SFP connections, so we must call it here. + **/ +s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + u16 san_mac_data, san_mac_offset; + u8 i; + + /* + * First read the EEPROM pointer to see if the MAC addresses are + * available. If they're not, no point in calling set_lan_id() here. + */ + ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); + + if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) { + /* + * No addresses available in this EEPROM. It's not an + * error though, so just wipe the local address and return. + */ + for (i = 0; i < 6; i++) + san_mac_addr[i] = 0xFF; + + goto san_mac_addr_out; + } + + /* make sure we know which port we need to program */ + hw->mac.ops.set_lan_id(hw); + /* apply the port offset to the address offset */ + (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) : + (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET); + for (i = 0; i < 3; i++) { + hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data); + san_mac_addr[i * 2] = (u8)(san_mac_data); + san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8); + san_mac_offset++; + } + +san_mac_addr_out: + return 0; +} + +/** + * ixgbe_set_san_mac_addr_generic - Write the SAN MAC address to the EEPROM + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Write a SAN MAC address to the EEPROM. + **/ +s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + s32 status = 0; + u16 san_mac_data, san_mac_offset; + u8 i; + + /* Look for SAN mac address pointer. If not defined, return */ + ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); + + if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) { + status = IXGBE_ERR_NO_SAN_ADDR_PTR; + goto san_mac_addr_out; + } + + /* Make sure we know which port we need to write */ + hw->mac.ops.set_lan_id(hw); + /* Apply the port offset to the address offset */ + (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) : + (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET); + + for (i = 0; i < 3; i++) { + san_mac_data = (u16)((u16)(san_mac_addr[i * 2 + 1]) << 8); + san_mac_data |= (u16)(san_mac_addr[i * 2]); + hw->eeprom.ops.write(hw, san_mac_offset, san_mac_data); + san_mac_offset++; + } + +san_mac_addr_out: + return status; +} + +/** + * ixgbe_get_pcie_msix_count_generic - Gets MSI-X vector count + * @hw: pointer to hardware structure + * + * Read PCIe configuration space, and get the MSI-X vector count from + * the capabilities table. + **/ +u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw) +{ + u16 msix_count = 1; + u16 max_msix_count; + u16 pcie_offset; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + pcie_offset = IXGBE_PCIE_MSIX_82598_CAPS; + max_msix_count = IXGBE_MAX_MSIX_VECTORS_82598; + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS; + max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599; + break; + default: + return msix_count; + } + + msix_count = IXGBE_READ_PCIE_WORD(hw, pcie_offset); + msix_count &= IXGBE_PCIE_MSIX_TBL_SZ_MASK; + + /* MSI-X count is zero-based in HW */ + msix_count++; + + if (msix_count > max_msix_count) + msix_count = max_msix_count; + + return msix_count; +} + +/** + * ixgbe_insert_mac_addr_generic - Find a RAR for this mac address + * @hw: pointer to hardware structure + * @addr: Address to put into receive address register + * @vmdq: VMDq pool to assign + * + * Puts an ethernet address into a receive address register, or + * finds the rar that it is aleady in; adds to the pool list + **/ +s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) +{ + static const u32 NO_EMPTY_RAR_FOUND = 0xFFFFFFFF; + u32 first_empty_rar = NO_EMPTY_RAR_FOUND; + u32 rar; + u32 rar_low, rar_high; + u32 addr_low, addr_high; + + /* swap bytes for HW little endian */ + addr_low = addr[0] | (addr[1] << 8) + | (addr[2] << 16) + | (addr[3] << 24); + addr_high = addr[4] | (addr[5] << 8); + + /* + * Either find the mac_id in rar or find the first empty space. + * rar_highwater points to just after the highest currently used + * rar in order to shorten the search. It grows when we add a new + * rar to the top. + */ + for (rar = 0; rar < hw->mac.rar_highwater; rar++) { + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); + + if (((IXGBE_RAH_AV & rar_high) == 0) + && first_empty_rar == NO_EMPTY_RAR_FOUND) { + first_empty_rar = rar; + } else if ((rar_high & 0xFFFF) == addr_high) { + rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(rar)); + if (rar_low == addr_low) + break; /* found it already in the rars */ + } + } + + if (rar < hw->mac.rar_highwater) { + /* already there so just add to the pool bits */ + ixgbe_set_vmdq(hw, rar, vmdq); + } else if (first_empty_rar != NO_EMPTY_RAR_FOUND) { + /* stick it into first empty RAR slot we found */ + rar = first_empty_rar; + ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); + } else if (rar == hw->mac.rar_highwater) { + /* add it to the top of the list and inc the highwater mark */ + ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); + hw->mac.rar_highwater++; + } else if (rar >= hw->mac.num_rar_entries) { + return IXGBE_ERR_INVALID_MAC_ADDR; + } + + /* + * If we found rar[0], make sure the default pool bit (we use pool 0) + * remains cleared to be sure default pool packets will get delivered + */ + if (rar == 0) + ixgbe_clear_vmdq(hw, rar, 0); + + return rar; +} + +/** + * ixgbe_clear_vmdq_generic - Disassociate a VMDq pool index from a rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to disassociate + * @vmdq: VMDq pool index to remove from the rar + **/ +s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 mpsar_lo, mpsar_hi; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); + mpsar_hi = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); + + if (!mpsar_lo && !mpsar_hi) + goto done; + + if (vmdq == IXGBE_CLEAR_VMDQ_ALL) { + if (mpsar_lo) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0); + mpsar_lo = 0; + } + if (mpsar_hi) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0); + mpsar_hi = 0; + } + } else if (vmdq < 32) { + mpsar_lo &= ~(1 << vmdq); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo); + } else { + mpsar_hi &= ~(1 << (vmdq - 32)); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi); + } + + /* was that the last pool using this rar? */ + if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) + hw->mac.ops.clear_rar(hw, rar); +done: + return 0; +} + +/** + * ixgbe_set_vmdq_generic - Associate a VMDq pool index with a rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to associate with a VMDq index + * @vmdq: VMDq pool index + **/ +s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 mpsar; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + if (vmdq < 32) { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); + mpsar |= 1 << vmdq; + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar); + } else { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); + mpsar |= 1 << (vmdq - 32); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar); + } + return 0; +} + +/** + * This function should only be involved in the IOV mode. + * In IOV mode, Default pool is next pool after the number of + * VFs advertized and not 0. + * MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index] + * + * ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address + * @hw: pointer to hardware struct + * @vmdq: VMDq pool index + **/ +s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq) +{ + u32 mpsar; + u32 rar = hw->mac.san_mac_rar_index; + + if (vmdq < 32) { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); + mpsar |= 1 << vmdq; + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar); + } else { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); + mpsar |= 1 << (vmdq - 32); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar); + } + + return 0; +} + +/** + * ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array + * @hw: pointer to hardware structure + **/ +s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw) +{ + int i; + + hw_dbg(hw, " Clearing UTA\n"); + + for (i = 0; i < 128; i++) + IXGBE_WRITE_REG(hw, IXGBE_UTA(i), 0); + + return 0; +} + +/** + * ixgbe_find_vlvf_slot - find the vlanid or the first empty slot + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * + * return the VLVF index where this VLAN id should be placed + * + **/ +s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan) +{ + u32 bits = 0; + u32 first_empty_slot = 0; + s32 regindex; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* + * Search for the vlan id in the VLVF entries. Save off the first empty + * slot found along the way + */ + for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) { + bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex)); + if (!bits && !(first_empty_slot)) + first_empty_slot = regindex; + else if ((bits & 0x0FFF) == vlan) + break; + } + + /* + * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan + * in the VLVF. Else use the first empty VLVF register for this + * vlan id. + */ + if (regindex >= IXGBE_VLVF_ENTRIES) { + if (first_empty_slot) + regindex = first_empty_slot; + else { + hw_dbg(hw, "No space in VLVF.\n"); + regindex = IXGBE_ERR_NO_SPACE; + } + } + + return regindex; +} + +/** + * ixgbe_set_vfta_generic - Set VLAN filter table + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on) +{ + s32 regindex; + u32 bitindex; + u32 vfta; + u32 targetbit; + s32 ret_val = 0; + bool vfta_changed = false; + + if (vlan > 4095) + return IXGBE_ERR_PARAM; + + /* + * this is a 2 part operation - first the VFTA, then the + * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. + */ + + /* Part 1 + * The VFTA is a bitstring made up of 128 32-bit registers + * that enable the particular VLAN id, much like the MTA: + * bits[11-5]: which register + * bits[4-0]: which bit in the register + */ + regindex = (vlan >> 5) & 0x7F; + bitindex = vlan & 0x1F; + targetbit = (1 << bitindex); + vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); + + if (vlan_on) { + if (!(vfta & targetbit)) { + vfta |= targetbit; + vfta_changed = true; + } + } else { + if ((vfta & targetbit)) { + vfta &= ~targetbit; + vfta_changed = true; + } + } + + /* Part 2 + * Call ixgbe_set_vlvf_generic to set VLVFB and VLVF + */ + ret_val = ixgbe_set_vlvf_generic(hw, vlan, vind, vlan_on, + &vfta_changed); + if (ret_val != 0) + return ret_val; + + if (vfta_changed) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta); + + return 0; +} + +/** + * ixgbe_set_vlvf_generic - Set VLAN Pool Filter + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * @vfta_changed: pointer to boolean flag which indicates whether VFTA + * should be changed + * + * Turn on/off specified bit in VLVF table. + **/ +s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool *vfta_changed) +{ + u32 vt; + + if (vlan > 4095) + return IXGBE_ERR_PARAM; + + /* If VT Mode is set + * Either vlan_on + * make sure the vlan is in VLVF + * set the vind bit in the matching VLVFB + * Or !vlan_on + * clear the pool bit and possibly the vind + */ + vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL); + if (vt & IXGBE_VT_CTL_VT_ENABLE) { + s32 vlvf_index; + u32 bits; + + vlvf_index = ixgbe_find_vlvf_slot(hw, vlan); + if (vlvf_index < 0) + return vlvf_index; + + if (vlan_on) { + /* set the pool bit */ + if (vind < 32) { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB(vlvf_index * 2)); + bits |= (1 << vind); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB(vlvf_index * 2), + bits); + } else { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1)); + bits |= (1 << (vind - 32)); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1), + bits); + } + } else { + /* clear the pool bit */ + if (vind < 32) { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB(vlvf_index * 2)); + bits &= ~(1 << vind); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB(vlvf_index * 2), + bits); + bits |= IXGBE_READ_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1)); + } else { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1)); + bits &= ~(1 << (vind - 32)); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1), + bits); + bits |= IXGBE_READ_REG(hw, + IXGBE_VLVFB(vlvf_index * 2)); + } + } + + /* + * If there are still bits set in the VLVFB registers + * for the VLAN ID indicated we need to see if the + * caller is requesting that we clear the VFTA entry bit. + * If the caller has requested that we clear the VFTA + * entry bit but there are still pools/VFs using this VLAN + * ID entry then ignore the request. We're not worried + * about the case where we're turning the VFTA VLAN ID + * entry bit on, only when requested to turn it off as + * there may be multiple pools and/or VFs using the + * VLAN ID entry. In that case we cannot clear the + * VFTA bit until all pools/VFs using that VLAN ID have also + * been cleared. This will be indicated by "bits" being + * zero. + */ + if (bits) { + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), + (IXGBE_VLVF_VIEN | vlan)); + if ((!vlan_on) && (vfta_changed != NULL)) { + /* someone wants to clear the vfta entry + * but some pools/VFs are still using it. + * Ignore it. */ + *vfta_changed = false; + } + } else + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0); + } + + return 0; +} + +/** + * ixgbe_clear_vfta_generic - Clear VLAN filter table + * @hw: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) +{ + u32 offset; + + for (offset = 0; offset < hw->mac.vft_size; offset++) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0); + + for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) { + IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset * 2) + 1), 0); + } + + return 0; +} + +/** + * ixgbe_check_mac_link_generic - Determine link and speed status + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @link_up: true when link is up + * @link_up_wait_to_complete: bool used to wait for link up or not + * + * Reads the links register to determine if link is up and the current speed + **/ +s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete) +{ + u32 links_reg, links_orig; + u32 i; + + /* clear the old state */ + links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS); + + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + + if (links_orig != links_reg) { + hw_dbg(hw, "LINKS changed from %08X to %08X\n", + links_orig, links_reg); + } + + if (link_up_wait_to_complete) { + for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { + if (links_reg & IXGBE_LINKS_UP) { + *link_up = true; + break; + } else { + *link_up = false; + } + msleep(100); + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + } + } else { + if (links_reg & IXGBE_LINKS_UP) + *link_up = true; + else + *link_up = false; + } + + if ((links_reg & IXGBE_LINKS_SPEED_82599) == + IXGBE_LINKS_SPEED_10G_82599) + *speed = IXGBE_LINK_SPEED_10GB_FULL; + else if ((links_reg & IXGBE_LINKS_SPEED_82599) == + IXGBE_LINKS_SPEED_1G_82599) + *speed = IXGBE_LINK_SPEED_1GB_FULL; + else if ((links_reg & IXGBE_LINKS_SPEED_82599) == + IXGBE_LINKS_SPEED_100_82599) + *speed = IXGBE_LINK_SPEED_100_FULL; + else + *speed = IXGBE_LINK_SPEED_UNKNOWN; + + return 0; +} + +/** + * ixgbe_get_wwn_prefix_generic - Get alternative WWNN/WWPN prefix from + * the EEPROM + * @hw: pointer to hardware structure + * @wwnn_prefix: the alternative WWNN prefix + * @wwpn_prefix: the alternative WWPN prefix + * + * This function will read the EEPROM from the alternative SAN MAC address + * block to check the support for the alternative WWNN/WWPN prefix support. + **/ +s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix) +{ + u16 offset, caps; + u16 alt_san_mac_blk_offset; + + /* clear output first */ + *wwnn_prefix = 0xFFFF; + *wwpn_prefix = 0xFFFF; + + /* check if alternative SAN MAC is supported */ + hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR, + &alt_san_mac_blk_offset); + + if ((alt_san_mac_blk_offset == 0) || + (alt_san_mac_blk_offset == 0xFFFF)) + goto wwn_prefix_out; + + /* check capability in alternative san mac address block */ + offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET; + hw->eeprom.ops.read(hw, offset, &caps); + if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN)) + goto wwn_prefix_out; + + /* get the corresponding prefix for WWNN/WWPN */ + offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET; + hw->eeprom.ops.read(hw, offset, wwnn_prefix); + + offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET; + hw->eeprom.ops.read(hw, offset, wwpn_prefix); + +wwn_prefix_out: + return 0; +} + +/** + * ixgbe_get_fcoe_boot_status_generic - Get FCOE boot status from EEPROM + * @hw: pointer to hardware structure + * @bs: the fcoe boot status + * + * This function will read the FCOE boot status from the iSCSI FCOE block + **/ +s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs) +{ + u16 offset, caps, flags; + s32 status; + + /* clear output first */ + *bs = ixgbe_fcoe_bootstatus_unavailable; + + /* check if FCOE IBA block is present */ + offset = IXGBE_FCOE_IBA_CAPS_BLK_PTR; + status = hw->eeprom.ops.read(hw, offset, &caps); + if (status != 0) + goto out; + + if (!(caps & IXGBE_FCOE_IBA_CAPS_FCOE)) + goto out; + + /* check if iSCSI FCOE block is populated */ + status = hw->eeprom.ops.read(hw, IXGBE_ISCSI_FCOE_BLK_PTR, &offset); + if (status != 0) + goto out; + + if ((offset == 0) || (offset == 0xFFFF)) + goto out; + + /* read fcoe flags in iSCSI FCOE block */ + offset = offset + IXGBE_ISCSI_FCOE_FLAGS_OFFSET; + status = hw->eeprom.ops.read(hw, offset, &flags); + if (status != 0) + goto out; + + if (flags & IXGBE_ISCSI_FCOE_FLAGS_ENABLE) + *bs = ixgbe_fcoe_bootstatus_enabled; + else + *bs = ixgbe_fcoe_bootstatus_disabled; + +out: + return status; +} + +/** + * ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing + * @hw: pointer to hardware structure + * @enable: enable or disable switch for anti-spoofing + * @pf: Physical Function pool - do not enable anti-spoofing for the PF + * + **/ +void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf) +{ + int j; + int pf_target_reg = pf >> 3; + int pf_target_shift = pf % 8; + u32 pfvfspoof = 0; + + if (hw->mac.type == ixgbe_mac_82598EB) + return; + + if (enable) + pfvfspoof = IXGBE_SPOOF_MACAS_MASK; + + /* + * PFVFSPOOF register array is size 8 with 8 bits assigned to + * MAC anti-spoof enables in each register array element. + */ + for (j = 0; j < IXGBE_PFVFSPOOF_REG_COUNT; j++) + IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof); + + /* If not enabling anti-spoofing then done */ + if (!enable) + return; + + /* + * The PF should be allowed to spoof so that it can support + * emulation mode NICs. Reset the bit assigned to the PF + */ + pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg)); + pfvfspoof ^= (1 << pf_target_shift); + IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg), pfvfspoof); +} + +/** + * ixgbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing + * @hw: pointer to hardware structure + * @enable: enable or disable switch for VLAN anti-spoofing + * @pf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing + * + **/ +void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf) +{ + int vf_target_reg = vf >> 3; + int vf_target_shift = vf % 8 + IXGBE_SPOOF_VLANAS_SHIFT; + u32 pfvfspoof; + + if (hw->mac.type == ixgbe_mac_82598EB) + return; + + pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); + if (enable) + pfvfspoof |= (1 << vf_target_shift); + else + pfvfspoof &= ~(1 << vf_target_shift); + IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof); +} + +/** + * ixgbe_get_device_caps_generic - Get additional device capabilities + * @hw: pointer to hardware structure + * @device_caps: the EEPROM word with the extra device capabilities + * + * This function will read the EEPROM location for the device capabilities, + * and return the word through device_caps. + **/ +s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps) +{ + hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps); + + return 0; +} + +/** + * ixgbe_calculate_checksum - Calculate checksum for buffer + * @buffer: pointer to EEPROM + * @length: size of EEPROM to calculate a checksum for + * Calculates the checksum for some buffer on a specified length. The + * checksum calculated is returned. + **/ +static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) +{ + u32 i; + u8 sum = 0; + + if (!buffer) + return 0; + for (i = 0; i < length; i++) + sum += buffer[i]; + + return (u8) (0 - sum); +} + +/** + * ixgbe_host_interface_command - Issue command to manageability block + * @hw: pointer to the HW structure + * @buffer: contains the command to write and where the return status will + * be placed + * @length: length of buffer, must be multiple of 4 bytes + * + * Communicates with the manageability block. On success return 0 + * else return IXGBE_ERR_HOST_INTERFACE_COMMAND. + **/ +static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, + u32 length) +{ + u32 hicr, i, bi; + u32 hdr_size = sizeof(struct ixgbe_hic_hdr); + u8 buf_len, dword_len; + + s32 ret_val = 0; + + if (length == 0 || length & 0x3 || + length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { + hw_dbg(hw, "Buffer length failure.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Check that the host interface is enabled. */ + hicr = IXGBE_READ_REG(hw, IXGBE_HICR); + if ((hicr & IXGBE_HICR_EN) == 0) { + hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Calculate length in DWORDs */ + dword_len = length >> 2; + + /* + * The device driver writes the relevant command block + * into the ram area. + */ + for (i = 0; i < dword_len; i++) + IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG, + i, IXGBE_CPU_TO_LE32(buffer[i])); + + /* Setting this bit tells the ARC that a new command is pending. */ + IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C); + + for (i = 0; i < IXGBE_HI_COMMAND_TIMEOUT; i++) { + hicr = IXGBE_READ_REG(hw, IXGBE_HICR); + if (!(hicr & IXGBE_HICR_C)) + break; + msleep(1); + } + + /* Check command successful completion. */ + if (i == IXGBE_HI_COMMAND_TIMEOUT || + (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) { + hw_dbg(hw, "Command has failed with no status valid.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Calculate length in DWORDs */ + dword_len = hdr_size >> 2; + + /* first pull in the header so we know the buffer length */ + for (bi = 0; bi < dword_len; bi++) { + buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); + IXGBE_LE32_TO_CPUS(&buffer[bi]); + } + + /* If there is any thing in data position pull it in */ + buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len; + if (buf_len == 0) + goto out; + + if (length < (buf_len + hdr_size)) { + hw_dbg(hw, "Buffer not large enough for reply message.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Calculate length in DWORDs, add 3 for odd lengths */ + dword_len = (buf_len + 3) >> 2; + + /* Pull in the rest of the buffer (bi is where we left off)*/ + for (; bi <= dword_len; bi++) { + buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); + IXGBE_LE32_TO_CPUS(&buffer[bi]); + } + +out: + return ret_val; +} + +/** + * ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware + * @hw: pointer to the HW structure + * @maj: driver version major number + * @min: driver version minor number + * @build: driver version build number + * @sub: driver version sub build number + * + * Sends driver version number to firmware through the manageability + * block. On success return 0 + * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring + * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + **/ +s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 sub) +{ + struct ixgbe_hic_drv_info fw_cmd; + int i; + s32 ret_val = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM) + != 0) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; + fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN; + fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; + fw_cmd.port_num = (u8)hw->bus.func; + fw_cmd.ver_maj = maj; + fw_cmd.ver_min = min; + fw_cmd.ver_build = build; + fw_cmd.ver_sub = sub; + fw_cmd.hdr.checksum = 0; + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); + fw_cmd.pad = 0; + fw_cmd.pad2 = 0; + + for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { + ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd, + sizeof(fw_cmd)); + if (ret_val != 0) + continue; + + if (fw_cmd.hdr.cmd_or_resp.ret_status == + FW_CEM_RESP_STATUS_SUCCESS) + ret_val = 0; + else + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + + break; + } + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); +out: + return ret_val; +} + +/** + * ixgbe_set_rxpba_generic - Initialize Rx packet buffer + * @hw: pointer to hardware structure + * @num_pb: number of packet buffers to allocate + * @headroom: reserve n KB of headroom + * @strategy: packet buffer allocation strategy + **/ +void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, + int strategy) +{ + u32 pbsize = hw->mac.rx_pb_size; + int i = 0; + u32 rxpktsize, txpktsize, txpbthresh; + + /* Reserve headroom */ + pbsize -= headroom; + + if (!num_pb) + num_pb = 1; + + /* Divide remaining packet buffer space amongst the number of packet + * buffers requested using supplied strategy. + */ + switch (strategy) { + case PBA_STRATEGY_WEIGHTED: + /* ixgbe_dcb_pba_80_48 strategy weight first half of packet + * buffer with 5/8 of the packet buffer space. + */ + rxpktsize = (pbsize * 5) / (num_pb * 4); + pbsize -= rxpktsize * (num_pb / 2); + rxpktsize <<= IXGBE_RXPBSIZE_SHIFT; + for (; i < (num_pb / 2); i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + /* Fall through to configure remaining packet buffers */ + case PBA_STRATEGY_EQUAL: + rxpktsize = (pbsize / (num_pb - i)) << IXGBE_RXPBSIZE_SHIFT; + for (; i < num_pb; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + break; + default: + break; + } + + /* Only support an equally distributed Tx packet buffer strategy. */ + txpktsize = IXGBE_TXPBSIZE_MAX / num_pb; + txpbthresh = (txpktsize / 1024) - IXGBE_TXPKT_SIZE_MAX; + for (i = 0; i < num_pb; i++) { + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize); + IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh); + } + + /* Clear unused TCs, if any, to zero buffer size*/ + for (; i < IXGBE_MAX_PB; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0); + } +} + +/** + * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo + * @hw: pointer to the hardware structure + * + * The 82599 and x540 MACs can experience issues if TX work is still pending + * when a reset occurs. This function prevents this by flushing the PCIe + * buffers on the system. + **/ +void ixgbe_clear_tx_pending(struct ixgbe_hw *hw) +{ + u32 gcr_ext, hlreg0; + + /* + * If double reset is not requested then all transactions should + * already be clear and as such there is no work to do + */ + if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED)) + return; + + /* + * Set loopback enable to prevent any transmits from being sent + * should the link come up. This assumes that the RXCTRL.RXEN bit + * has already been cleared. + */ + hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK); + + /* initiate cleaning flow for buffers in the PCIe transaction layer */ + gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, + gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR); + + /* Flush all writes and allow 20usec for all transactions to clear */ + IXGBE_WRITE_FLUSH(hw); + udelay(20); + + /* restore previous register values */ + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); +} + +static const u8 ixgbe_emc_temp_data[4] = { + IXGBE_EMC_INTERNAL_DATA, + IXGBE_EMC_DIODE1_DATA, + IXGBE_EMC_DIODE2_DATA, + IXGBE_EMC_DIODE3_DATA +}; +static const u8 ixgbe_emc_therm_limit[4] = { + IXGBE_EMC_INTERNAL_THERM_LIMIT, + IXGBE_EMC_DIODE1_THERM_LIMIT, + IXGBE_EMC_DIODE2_THERM_LIMIT, + IXGBE_EMC_DIODE3_THERM_LIMIT +}; + +/** + * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data + * @hw: pointer to hardware structure + * @data: pointer to the thermal sensor data structure + * + * Returns the thermal sensor data structure + **/ +s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 i; + struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + /* Only support thermal sensors attached to 82599 physical port 0 */ + if ((hw->mac.type != ixgbe_mac_82599EB) || + (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) { + status = IXGBE_NOT_IMPLEMENTED; + goto out; + } + + status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset); + if (status) + goto out; + + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) { + status = IXGBE_NOT_IMPLEMENTED; + goto out; + } + + status = hw->eeprom.ops.read(hw, ets_offset, &ets_cfg); + if (status) + goto out; + + if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT) + != IXGBE_ETS_TYPE_EMC) { + status = IXGBE_NOT_IMPLEMENTED; + goto out; + } + + num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); + if (num_sensors > IXGBE_MAX_SENSORS) + num_sensors = IXGBE_MAX_SENSORS; + + for (i = 0; i < num_sensors; i++) { + status = hw->eeprom.ops.read(hw, (ets_offset + 1 + i), + &ets_sensor); + if (status) + goto out; + + sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> + IXGBE_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> + IXGBE_ETS_DATA_LOC_SHIFT); + + if (sensor_location != 0) { + status = hw->phy.ops.read_i2c_byte(hw, + ixgbe_emc_temp_data[sensor_index], + IXGBE_I2C_THERMAL_SENSOR_ADDR, + &data->sensor[i].temp); + if (status) + goto out; + } + } +out: + return status; +} + +/** + * ixgbe_init_thermal_sensor_thresh_generic - Inits thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Inits the thermal sensor thresholds according to the NVM map + * and save off the threshold and location values into mac.thermal_sensor_data + **/ +s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 low_thresh_delta; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 therm_limit; + u8 i; + struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data)); + + /* Only support thermal sensors attached to 82599 physical port 0 */ + if ((hw->mac.type != ixgbe_mac_82599EB) || + (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) + return IXGBE_NOT_IMPLEMENTED; + + hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset); + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) + return IXGBE_NOT_IMPLEMENTED; + + hw->eeprom.ops.read(hw, ets_offset, &ets_cfg); + if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT) + != IXGBE_ETS_TYPE_EMC) + return IXGBE_NOT_IMPLEMENTED; + + low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >> + IXGBE_ETS_LTHRES_DELTA_SHIFT); + num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); + + for (i = 0; i < num_sensors; i++) { + hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor); + sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> + IXGBE_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> + IXGBE_ETS_DATA_LOC_SHIFT); + therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK; + + hw->phy.ops.write_i2c_byte(hw, + ixgbe_emc_therm_limit[sensor_index], + IXGBE_I2C_THERMAL_SENSOR_ADDR, therm_limit); + + if ((i < IXGBE_MAX_SENSORS) && (sensor_location != 0)) { + data->sensor[i].location = sensor_location; + data->sensor[i].caution_thresh = therm_limit; + data->sensor[i].max_op_thresh = therm_limit - + low_thresh_delta; + } + } + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h new file mode 100644 index 00000000..9bd6f534 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h @@ -0,0 +1,140 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_COMMON_H_ +#define _IXGBE_COMMON_H_ + +#include "ixgbe_type.h" + +u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw); + +s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw); +s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw); +s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw); +s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, + u32 pba_num_size); +s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr); +s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw); +void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw); +s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); + +s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index); + +s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw); +s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 *data); +s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw); +s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, + u16 *checksum_val); +s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw); +s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg); + +s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr); +s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw); +s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, + ixgbe_mc_addr_itr func, bool clear); +s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr func); +s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw); +s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw); +s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval); +s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw); +s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw); + +s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg(struct ixgbe_hw *hw); + +s32 ixgbe_validate_mac_addr(u8 *mac_addr); +s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask); +void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask); +s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw); + +s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index); + +s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr); +s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr); + +s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq); +s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); +s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw); +s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, + u32 vind, bool vlan_on); +s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool *vfta_changed); +s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw); +s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan); + +s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete); + +s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix); + +s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs); +void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf); +void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf); +s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps); +void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, + int strategy); +s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 ver); +void ixgbe_clear_tx_pending(struct ixgbe_hw *hw); + +#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8 +#define IXGBE_EMC_INTERNAL_DATA 0x00 +#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20 +#define IXGBE_EMC_DIODE1_DATA 0x01 +#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19 +#define IXGBE_EMC_DIODE2_DATA 0x23 +#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A +#define IXGBE_EMC_DIODE3_DATA 0x2A +#define IXGBE_EMC_DIODE3_THERM_LIMIT 0x30 + +s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw); +s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw); +#endif /* IXGBE_COMMON */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h new file mode 100644 index 00000000..a6690451 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h @@ -0,0 +1,168 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_DCB_H_ +#define _IXGBE_DCB_H_ + + +#include "ixgbe_type.h" + +/* DCB defines */ +/* DCB credit calculation defines */ +#define IXGBE_DCB_CREDIT_QUANTUM 64 +#define IXGBE_DCB_MAX_CREDIT_REFILL 200 /* 200 * 64B = 12800B */ +#define IXGBE_DCB_MAX_TSO_SIZE (32 * 1024) /* Max TSO pkt size in DCB*/ +#define IXGBE_DCB_MAX_CREDIT (2 * IXGBE_DCB_MAX_CREDIT_REFILL) + +/* 513 for 32KB TSO packet */ +#define IXGBE_DCB_MIN_TSO_CREDIT \ + ((IXGBE_DCB_MAX_TSO_SIZE / IXGBE_DCB_CREDIT_QUANTUM) + 1) + +/* DCB configuration defines */ +#define IXGBE_DCB_MAX_USER_PRIORITY 8 +#define IXGBE_DCB_MAX_BW_GROUP 8 +#define IXGBE_DCB_BW_PERCENT 100 + +#define IXGBE_DCB_TX_CONFIG 0 +#define IXGBE_DCB_RX_CONFIG 1 + +/* DCB capability defines */ +#define IXGBE_DCB_PG_SUPPORT 0x00000001 +#define IXGBE_DCB_PFC_SUPPORT 0x00000002 +#define IXGBE_DCB_BCN_SUPPORT 0x00000004 +#define IXGBE_DCB_UP2TC_SUPPORT 0x00000008 +#define IXGBE_DCB_GSP_SUPPORT 0x00000010 + +struct ixgbe_dcb_support { + u32 capabilities; /* DCB capabilities */ + + /* Each bit represents a number of TCs configurable in the hw. + * If 8 traffic classes can be configured, the value is 0x80. */ + u8 traffic_classes; + u8 pfc_traffic_classes; +}; + +enum ixgbe_dcb_tsa { + ixgbe_dcb_tsa_ets = 0, + ixgbe_dcb_tsa_group_strict_cee, + ixgbe_dcb_tsa_strict +}; + +/* Traffic class bandwidth allocation per direction */ +struct ixgbe_dcb_tc_path { + u8 bwg_id; /* Bandwidth Group (BWG) ID */ + u8 bwg_percent; /* % of BWG's bandwidth */ + u8 link_percent; /* % of link bandwidth */ + u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */ + u16 data_credits_refill; /* Credit refill amount in 64B granularity */ + u16 data_credits_max; /* Max credits for a configured packet buffer + * in 64B granularity.*/ + enum ixgbe_dcb_tsa tsa; /* Link or Group Strict Priority */ +}; + +enum ixgbe_dcb_pfc { + ixgbe_dcb_pfc_disabled = 0, + ixgbe_dcb_pfc_enabled, + ixgbe_dcb_pfc_enabled_txonly, + ixgbe_dcb_pfc_enabled_rxonly +}; + +/* Traffic class configuration */ +struct ixgbe_dcb_tc_config { + struct ixgbe_dcb_tc_path path[2]; /* One each for Tx/Rx */ + enum ixgbe_dcb_pfc pfc; /* Class based flow control setting */ + + u16 desc_credits_max; /* For Tx Descriptor arbitration */ + u8 tc; /* Traffic class (TC) */ +}; + +enum ixgbe_dcb_pba { + /* PBA[0-7] each use 64KB FIFO */ + ixgbe_dcb_pba_equal = PBA_STRATEGY_EQUAL, + /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */ + ixgbe_dcb_pba_80_48 = PBA_STRATEGY_WEIGHTED +}; + +struct ixgbe_dcb_num_tcs { + u8 pg_tcs; + u8 pfc_tcs; +}; + +struct ixgbe_dcb_config { + struct ixgbe_dcb_tc_config tc_config[IXGBE_DCB_MAX_TRAFFIC_CLASS]; + struct ixgbe_dcb_support support; + struct ixgbe_dcb_num_tcs num_tcs; + u8 bw_percentage[2][IXGBE_DCB_MAX_BW_GROUP]; /* One each for Tx/Rx */ + bool pfc_mode_enable; + bool round_robin_enable; + + enum ixgbe_dcb_pba rx_pba_cfg; + + u32 dcb_cfg_version; /* Not used...OS-specific? */ + u32 link_speed; /* For bandwidth allocation validation purpose */ + bool vt_mode; +}; + +/* DCB driver APIs */ + +/* DCB rule checking */ +s32 ixgbe_dcb_check_config_cee(struct ixgbe_dcb_config *); + +/* DCB credits calculation */ +s32 ixgbe_dcb_calculate_tc_credits(u8 *, u16 *, u16 *, int); +s32 ixgbe_dcb_calculate_tc_credits_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *, u32, u8); + +/* DCB PFC */ +s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, u8, u8 *); +s32 ixgbe_dcb_config_pfc_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +/* DCB stats */ +s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *); +s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); +s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); + +/* DCB config arbiters */ +s32 ixgbe_dcb_config_tx_desc_arbiter_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_tx_data_arbiter_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_rx_arbiter_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *); + +/* DCB unpack routines */ +void ixgbe_dcb_unpack_pfc_cee(struct ixgbe_dcb_config *, u8 *, u8 *); +void ixgbe_dcb_unpack_refill_cee(struct ixgbe_dcb_config *, int, u16 *); +void ixgbe_dcb_unpack_max_cee(struct ixgbe_dcb_config *, u16 *); +void ixgbe_dcb_unpack_bwgid_cee(struct ixgbe_dcb_config *, int, u8 *); +void ixgbe_dcb_unpack_tsa_cee(struct ixgbe_dcb_config *, int, u8 *); +void ixgbe_dcb_unpack_map_cee(struct ixgbe_dcb_config *, int, u8 *); + +/* DCB initialization */ +s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, u16 *, u16 *, u8 *, u8 *, u8 *); +s32 ixgbe_dcb_hw_config_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *); +#endif /* _IXGBE_DCB_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c new file mode 100644 index 00000000..11472bd3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c @@ -0,0 +1,2901 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* ethtool support for ixgbe */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef SIOCETHTOOL +#include + +#include "ixgbe.h" + +#ifndef ETH_GSTRING_LEN +#define ETH_GSTRING_LEN 32 +#endif + +#define IXGBE_ALL_RAR_ENTRIES 16 + +#ifdef ETHTOOL_OPS_COMPAT +#include "kcompat_ethtool.c" +#endif +#ifdef ETHTOOL_GSTATS +struct ixgbe_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define IXGBE_NETDEV_STAT(_net_stat) { \ + .stat_string = #_net_stat, \ + .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \ + .stat_offset = offsetof(struct net_device_stats, _net_stat) \ +} +static const struct ixgbe_stats ixgbe_gstrings_net_stats[] = { + IXGBE_NETDEV_STAT(rx_packets), + IXGBE_NETDEV_STAT(tx_packets), + IXGBE_NETDEV_STAT(rx_bytes), + IXGBE_NETDEV_STAT(tx_bytes), + IXGBE_NETDEV_STAT(rx_errors), + IXGBE_NETDEV_STAT(tx_errors), + IXGBE_NETDEV_STAT(rx_dropped), + IXGBE_NETDEV_STAT(tx_dropped), + IXGBE_NETDEV_STAT(multicast), + IXGBE_NETDEV_STAT(collisions), + IXGBE_NETDEV_STAT(rx_over_errors), + IXGBE_NETDEV_STAT(rx_crc_errors), + IXGBE_NETDEV_STAT(rx_frame_errors), + IXGBE_NETDEV_STAT(rx_fifo_errors), + IXGBE_NETDEV_STAT(rx_missed_errors), + IXGBE_NETDEV_STAT(tx_aborted_errors), + IXGBE_NETDEV_STAT(tx_carrier_errors), + IXGBE_NETDEV_STAT(tx_fifo_errors), + IXGBE_NETDEV_STAT(tx_heartbeat_errors), +}; + +#define IXGBE_STAT(_name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(struct ixgbe_adapter, _stat), \ + .stat_offset = offsetof(struct ixgbe_adapter, _stat) \ +} +static struct ixgbe_stats ixgbe_gstrings_stats[] = { + IXGBE_STAT("rx_pkts_nic", stats.gprc), + IXGBE_STAT("tx_pkts_nic", stats.gptc), + IXGBE_STAT("rx_bytes_nic", stats.gorc), + IXGBE_STAT("tx_bytes_nic", stats.gotc), + IXGBE_STAT("lsc_int", lsc_int), + IXGBE_STAT("tx_busy", tx_busy), + IXGBE_STAT("non_eop_descs", non_eop_descs), +#ifndef CONFIG_IXGBE_NAPI + IXGBE_STAT("rx_dropped_backlog", rx_dropped_backlog), +#endif + IXGBE_STAT("broadcast", stats.bprc), + IXGBE_STAT("rx_no_buffer_count", stats.rnbc[0]) , + IXGBE_STAT("tx_timeout_count", tx_timeout_count), + IXGBE_STAT("tx_restart_queue", restart_queue), + IXGBE_STAT("rx_long_length_errors", stats.roc), + IXGBE_STAT("rx_short_length_errors", stats.ruc), + IXGBE_STAT("tx_flow_control_xon", stats.lxontxc), + IXGBE_STAT("rx_flow_control_xon", stats.lxonrxc), + IXGBE_STAT("tx_flow_control_xoff", stats.lxofftxc), + IXGBE_STAT("rx_flow_control_xoff", stats.lxoffrxc), + IXGBE_STAT("rx_csum_offload_errors", hw_csum_rx_error), + IXGBE_STAT("alloc_rx_page_failed", alloc_rx_page_failed), + IXGBE_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed), +#ifndef IXGBE_NO_LRO + IXGBE_STAT("lro_aggregated", lro_stats.coal), + IXGBE_STAT("lro_flushed", lro_stats.flushed), +#endif /* IXGBE_NO_LRO */ + IXGBE_STAT("rx_no_dma_resources", hw_rx_no_dma_resources), + IXGBE_STAT("hw_rsc_aggregated", rsc_total_count), + IXGBE_STAT("hw_rsc_flushed", rsc_total_flush), +#ifdef HAVE_TX_MQ + IXGBE_STAT("fdir_match", stats.fdirmatch), + IXGBE_STAT("fdir_miss", stats.fdirmiss), + IXGBE_STAT("fdir_overflow", fdir_overflow), +#endif /* HAVE_TX_MQ */ +#ifdef IXGBE_FCOE + IXGBE_STAT("fcoe_bad_fccrc", stats.fccrc), + IXGBE_STAT("fcoe_last_errors", stats.fclast), + IXGBE_STAT("rx_fcoe_dropped", stats.fcoerpdc), + IXGBE_STAT("rx_fcoe_packets", stats.fcoeprc), + IXGBE_STAT("rx_fcoe_dwords", stats.fcoedwrc), + IXGBE_STAT("fcoe_noddp", stats.fcoe_noddp), + IXGBE_STAT("fcoe_noddp_ext_buff", stats.fcoe_noddp_ext_buff), + IXGBE_STAT("tx_fcoe_packets", stats.fcoeptc), + IXGBE_STAT("tx_fcoe_dwords", stats.fcoedwtc), +#endif /* IXGBE_FCOE */ + IXGBE_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), + IXGBE_STAT("os2bmc_tx_by_bmc", stats.b2ospc), + IXGBE_STAT("os2bmc_tx_by_host", stats.o2bspc), + IXGBE_STAT("os2bmc_rx_by_host", stats.b2ogprc), +}; + +#define IXGBE_QUEUE_STATS_LEN \ + ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \ + ((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \ + (sizeof(struct ixgbe_queue_stats) / sizeof(u64))) +#define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats) +#define IXGBE_NETDEV_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_net_stats) +#define IXGBE_PB_STATS_LEN ( \ + (((struct ixgbe_adapter *)netdev_priv(netdev))->flags & \ + IXGBE_FLAG_DCB_ENABLED) ? \ + (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \ + / sizeof(u64) : 0) +#define IXGBE_VF_STATS_LEN \ + ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_vfs) * \ + (sizeof(struct vf_stats) / sizeof(u64))) +#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \ + IXGBE_NETDEV_STATS_LEN + \ + IXGBE_PB_STATS_LEN + \ + IXGBE_QUEUE_STATS_LEN + \ + IXGBE_VF_STATS_LEN) + +#endif /* ETHTOOL_GSTATS */ +#ifdef ETHTOOL_TEST +static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { + "Register test (offline)", "Eeprom test (offline)", + "Interrupt test (offline)", "Loopback test (offline)", + "Link test (on/offline)" +}; +#define IXGBE_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) +#endif /* ETHTOOL_TEST */ + +int ixgbe_get_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 link_speed = 0; + bool link_up; + + ecmd->supported = SUPPORTED_10000baseT_Full; + ecmd->autoneg = AUTONEG_ENABLE; + ecmd->transceiver = XCVR_EXTERNAL; + if ((hw->phy.media_type == ixgbe_media_type_copper) || + (hw->phy.multispeed_fiber)) { + ecmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg); + switch (hw->mac.type) { + case ixgbe_mac_X540: + ecmd->supported |= SUPPORTED_100baseT_Full; + break; + default: + break; + } + + ecmd->advertising = ADVERTISED_Autoneg; + if (hw->phy.autoneg_advertised) { + if (hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_100_FULL) + ecmd->advertising |= ADVERTISED_100baseT_Full; + if (hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_10GB_FULL) + ecmd->advertising |= ADVERTISED_10000baseT_Full; + if (hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_1GB_FULL) + ecmd->advertising |= ADVERTISED_1000baseT_Full; + } else { + /* + * Default advertised modes in case + * phy.autoneg_advertised isn't set. + */ + ecmd->advertising |= (ADVERTISED_10000baseT_Full | + ADVERTISED_1000baseT_Full); + if (hw->mac.type == ixgbe_mac_X540) + ecmd->advertising |= ADVERTISED_100baseT_Full; + } + + if (hw->phy.media_type == ixgbe_media_type_copper) { + ecmd->supported |= SUPPORTED_TP; + ecmd->advertising |= ADVERTISED_TP; + ecmd->port = PORT_TP; + } else { + ecmd->supported |= SUPPORTED_FIBRE; + ecmd->advertising |= ADVERTISED_FIBRE; + ecmd->port = PORT_FIBRE; + } + } else if (hw->phy.media_type == ixgbe_media_type_backplane) { + /* Set as FIBRE until SERDES defined in kernel */ + if (hw->device_id == IXGBE_DEV_ID_82598_BX) { + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE); + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + ecmd->autoneg = AUTONEG_DISABLE; + } else if ((hw->device_id == IXGBE_DEV_ID_82599_COMBO_BACKPLANE) + || (hw->device_id == IXGBE_DEV_ID_82599_KX4_MEZZ)) { + ecmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg | + SUPPORTED_FIBRE); + ecmd->advertising = (ADVERTISED_10000baseT_Full | + ADVERTISED_1000baseT_Full | + ADVERTISED_Autoneg | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + } else { + ecmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE); + ecmd->advertising = (ADVERTISED_10000baseT_Full | + ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + } + } else { + ecmd->supported |= SUPPORTED_FIBRE; + ecmd->advertising = (ADVERTISED_10000baseT_Full | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + ecmd->autoneg = AUTONEG_DISABLE; + } + +#ifdef HAVE_ETHTOOL_SFP_DISPLAY_PORT + /* Get PHY type */ + switch (adapter->hw.phy.type) { + case ixgbe_phy_tn: + case ixgbe_phy_aq: + case ixgbe_phy_cu_unknown: + /* Copper 10G-BASET */ + ecmd->port = PORT_TP; + break; + case ixgbe_phy_qt: + ecmd->port = PORT_FIBRE; + break; + case ixgbe_phy_nl: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + switch (adapter->hw.phy.sfp_type) { + /* SFP+ devices, further checking needed */ + case ixgbe_sfp_type_da_cu: + case ixgbe_sfp_type_da_cu_core0: + case ixgbe_sfp_type_da_cu_core1: + ecmd->port = PORT_DA; + break; + case ixgbe_sfp_type_sr: + case ixgbe_sfp_type_lr: + case ixgbe_sfp_type_srlr_core0: + case ixgbe_sfp_type_srlr_core1: + ecmd->port = PORT_FIBRE; + break; + case ixgbe_sfp_type_not_present: + ecmd->port = PORT_NONE; + break; + case ixgbe_sfp_type_1g_cu_core0: + case ixgbe_sfp_type_1g_cu_core1: + ecmd->port = PORT_TP; + ecmd->supported = SUPPORTED_TP; + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_TP); + break; + case ixgbe_sfp_type_1g_sx_core0: + case ixgbe_sfp_type_1g_sx_core1: + ecmd->port = PORT_FIBRE; + ecmd->supported = SUPPORTED_FIBRE; + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE); + break; + case ixgbe_sfp_type_unknown: + default: + ecmd->port = PORT_OTHER; + break; + } + break; + case ixgbe_phy_xaui: + ecmd->port = PORT_NONE; + break; + case ixgbe_phy_unknown: + case ixgbe_phy_generic: + case ixgbe_phy_sfp_unsupported: + default: + ecmd->port = PORT_OTHER; + break; + } +#endif + + if (!in_interrupt()) { + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + } else { + /* + * this case is a special workaround for RHEL5 bonding + * that calls this routine from interrupt context + */ + link_speed = adapter->link_speed; + link_up = adapter->link_up; + } + + if (link_up) { + switch (link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ecmd->speed = SPEED_10000; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ecmd->speed = SPEED_1000; + break; + case IXGBE_LINK_SPEED_100_FULL: + ecmd->speed = SPEED_100; + break; + default: + break; + } + ecmd->duplex = DUPLEX_FULL; + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + return 0; +} + +static int ixgbe_set_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 advertised, old; + s32 err = 0; + + if ((hw->phy.media_type == ixgbe_media_type_copper) || + (hw->phy.multispeed_fiber)) { + /* + * this function does not support duplex forcing, but can + * limit the advertising of the adapter to the specified speed + */ + if (ecmd->autoneg == AUTONEG_DISABLE) + return -EINVAL; + + if (ecmd->advertising & ~ecmd->supported) + return -EINVAL; + + old = hw->phy.autoneg_advertised; + advertised = 0; + if (ecmd->advertising & ADVERTISED_10000baseT_Full) + advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (ecmd->advertising & ADVERTISED_1000baseT_Full) + advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + if (ecmd->advertising & ADVERTISED_100baseT_Full) + advertised |= IXGBE_LINK_SPEED_100_FULL; + + if (old == advertised) + return err; + /* this sets the link speed and restarts auto-neg */ + hw->mac.autotry_restart = true; + err = hw->mac.ops.setup_link(hw, advertised, true, true); + if (err) { + e_info(probe, "setup link failed with code %d\n", err); + hw->mac.ops.setup_link(hw, old, true, true); + } + } + return err; +} + +static void ixgbe_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + + if (hw->fc.disable_fc_autoneg) + pause->autoneg = 0; + else + pause->autoneg = 1; + + if (hw->fc.current_mode == ixgbe_fc_rx_pause) { + pause->rx_pause = 1; + } else if (hw->fc.current_mode == ixgbe_fc_tx_pause) { + pause->tx_pause = 1; + } else if (hw->fc.current_mode == ixgbe_fc_full) { + pause->rx_pause = 1; + pause->tx_pause = 1; + } +} + +static int ixgbe_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_fc_info fc = hw->fc; + + /* 82598 does no support link flow control with DCB enabled */ + if ((hw->mac.type == ixgbe_mac_82598EB) && + (adapter->flags & IXGBE_FLAG_DCB_ENABLED)) + return -EINVAL; + + fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE); + + if ((pause->rx_pause && pause->tx_pause) || pause->autoneg) + fc.requested_mode = ixgbe_fc_full; + else if (pause->rx_pause) + fc.requested_mode = ixgbe_fc_rx_pause; + else if (pause->tx_pause) + fc.requested_mode = ixgbe_fc_tx_pause; + else + fc.requested_mode = ixgbe_fc_none; + + /* if the thing changed then we'll update and use new autoneg */ + if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) { + hw->fc = fc; + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + else + ixgbe_reset(adapter); + } + + return 0; +} + +static u32 ixgbe_get_msglevel(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + return adapter->msg_enable; +} + +static void ixgbe_set_msglevel(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + adapter->msg_enable = data; +} + +static int ixgbe_get_regs_len(struct net_device *netdev) +{ +#define IXGBE_REGS_LEN 1129 + return IXGBE_REGS_LEN * sizeof(u32); +} + +#define IXGBE_GET_STAT(_A_, _R_) (_A_->stats._R_) + + +static void ixgbe_get_regs(struct net_device *netdev, struct ethtool_regs *regs, + void *p) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 *regs_buff = p; + u8 i; + + printk(KERN_DEBUG "ixgbe_get_regs_1\n"); + memset(p, 0, IXGBE_REGS_LEN * sizeof(u32)); + printk(KERN_DEBUG "ixgbe_get_regs_2 0x%p\n", hw->hw_addr); + + regs->version = (1 << 24) | hw->revision_id << 16 | hw->device_id; + + /* General Registers */ + regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL); + printk(KERN_DEBUG "ixgbe_get_regs_3\n"); + regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS); + regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP); + regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP); + regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER); + regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER); + + printk(KERN_DEBUG "ixgbe_get_regs_4\n"); + + /* NVM Register */ + regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_EEC); + regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_EERD); + regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_FLA); + regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_EEMNGCTL); + regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_EEMNGDATA); + regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_FLMNGCTL); + regs_buff[14] = IXGBE_READ_REG(hw, IXGBE_FLMNGDATA); + regs_buff[15] = IXGBE_READ_REG(hw, IXGBE_FLMNGCNT); + regs_buff[16] = IXGBE_READ_REG(hw, IXGBE_FLOP); + regs_buff[17] = IXGBE_READ_REG(hw, IXGBE_GRC); + + /* Interrupt */ + /* don't read EICR because it can clear interrupt causes, instead + * read EICS which is a shadow but doesn't clear EICR */ + regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS); + regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS); + regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS); + regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC); + regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC); + regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM); + regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0)); + regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0)); + regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT); + regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA); + regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0)); + regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE); + + /* Flow Control */ + regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP); + regs_buff[31] = IXGBE_READ_REG(hw, IXGBE_FCTTV(0)); + regs_buff[32] = IXGBE_READ_REG(hw, IXGBE_FCTTV(1)); + regs_buff[33] = IXGBE_READ_REG(hw, IXGBE_FCTTV(2)); + regs_buff[34] = IXGBE_READ_REG(hw, IXGBE_FCTTV(3)); + for (i = 0; i < 8; i++) { + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i)); + regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i)); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + regs_buff[35 + i] = IXGBE_READ_REG(hw, + IXGBE_FCRTL_82599(i)); + regs_buff[43 + i] = IXGBE_READ_REG(hw, + IXGBE_FCRTH_82599(i)); + break; + default: + break; + } + } + regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV); + regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS); + + /* Receive DMA */ + for (i = 0; i < 64; i++) + regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i)); + for (i = 0; i < 64; i++) + regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i)); + for (i = 0; i < 64; i++) + regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i)); + for (i = 0; i < 64; i++) + regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i)); + for (i = 0; i < 64; i++) + regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i)); + for (i = 0; i < 64; i++) + regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + for (i = 0; i < 16; i++) + regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); + for (i = 0; i < 16; i++) + regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + for (i = 0; i < 8; i++) + regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); + regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN); + + /* Receive */ + regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL); + for (i = 0; i < 16; i++) + regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i)); + for (i = 0; i < 16; i++) + regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i)); + regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)); + regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL); + regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL); + regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC); + regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL); + for (i = 0; i < 8; i++) + regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i)); + for (i = 0; i < 8; i++) + regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i)); + regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP); + + /* Transmit */ + for (i = 0; i < 32; i++) + regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i)); + for (i = 0; i < 32; i++) + regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i)); + for (i = 0; i < 32; i++) + regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i)); + for (i = 0; i < 32; i++) + regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i)); + for (i = 0; i < 32; i++) + regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i)); + for (i = 0; i < 32; i++) + regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); + for (i = 0; i < 32; i++) + regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i)); + for (i = 0; i < 32; i++) + regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i)); + regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL); + for (i = 0; i < 16; i++) + regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG); + for (i = 0; i < 8; i++) + regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i)); + regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP); + + /* Wake Up */ + regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC); + regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC); + regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS); + regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV); + regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT); + regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT); + regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL); + regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM); + regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0)); + + /* DCB */ + regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS); + regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS); + regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS); + regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR); + for (i = 0; i < 8; i++) + regs_buff[833 + i] = IXGBE_READ_REG(hw, IXGBE_RT2CR(i)); + for (i = 0; i < 8; i++) + regs_buff[841 + i] = IXGBE_READ_REG(hw, IXGBE_RT2SR(i)); + for (i = 0; i < 8; i++) + regs_buff[849 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i)); + for (i = 0; i < 8; i++) + regs_buff[857 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i)); + for (i = 0; i < 8; i++) + regs_buff[865 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i)); + for (i = 0; i < 8; i++) + regs_buff[873 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i)); + + /* Statistics */ + regs_buff[881] = IXGBE_GET_STAT(adapter, crcerrs); + regs_buff[882] = IXGBE_GET_STAT(adapter, illerrc); + regs_buff[883] = IXGBE_GET_STAT(adapter, errbc); + regs_buff[884] = IXGBE_GET_STAT(adapter, mspdc); + for (i = 0; i < 8; i++) + regs_buff[885 + i] = IXGBE_GET_STAT(adapter, mpc[i]); + regs_buff[893] = IXGBE_GET_STAT(adapter, mlfc); + regs_buff[894] = IXGBE_GET_STAT(adapter, mrfc); + regs_buff[895] = IXGBE_GET_STAT(adapter, rlec); + regs_buff[896] = IXGBE_GET_STAT(adapter, lxontxc); + regs_buff[897] = IXGBE_GET_STAT(adapter, lxonrxc); + regs_buff[898] = IXGBE_GET_STAT(adapter, lxofftxc); + regs_buff[899] = IXGBE_GET_STAT(adapter, lxoffrxc); + for (i = 0; i < 8; i++) + regs_buff[900 + i] = IXGBE_GET_STAT(adapter, pxontxc[i]); + for (i = 0; i < 8; i++) + regs_buff[908 + i] = IXGBE_GET_STAT(adapter, pxonrxc[i]); + for (i = 0; i < 8; i++) + regs_buff[916 + i] = IXGBE_GET_STAT(adapter, pxofftxc[i]); + for (i = 0; i < 8; i++) + regs_buff[924 + i] = IXGBE_GET_STAT(adapter, pxoffrxc[i]); + regs_buff[932] = IXGBE_GET_STAT(adapter, prc64); + regs_buff[933] = IXGBE_GET_STAT(adapter, prc127); + regs_buff[934] = IXGBE_GET_STAT(adapter, prc255); + regs_buff[935] = IXGBE_GET_STAT(adapter, prc511); + regs_buff[936] = IXGBE_GET_STAT(adapter, prc1023); + regs_buff[937] = IXGBE_GET_STAT(adapter, prc1522); + regs_buff[938] = IXGBE_GET_STAT(adapter, gprc); + regs_buff[939] = IXGBE_GET_STAT(adapter, bprc); + regs_buff[940] = IXGBE_GET_STAT(adapter, mprc); + regs_buff[941] = IXGBE_GET_STAT(adapter, gptc); + regs_buff[942] = IXGBE_GET_STAT(adapter, gorc); + regs_buff[944] = IXGBE_GET_STAT(adapter, gotc); + for (i = 0; i < 8; i++) + regs_buff[946 + i] = IXGBE_GET_STAT(adapter, rnbc[i]); + regs_buff[954] = IXGBE_GET_STAT(adapter, ruc); + regs_buff[955] = IXGBE_GET_STAT(adapter, rfc); + regs_buff[956] = IXGBE_GET_STAT(adapter, roc); + regs_buff[957] = IXGBE_GET_STAT(adapter, rjc); + regs_buff[958] = IXGBE_GET_STAT(adapter, mngprc); + regs_buff[959] = IXGBE_GET_STAT(adapter, mngpdc); + regs_buff[960] = IXGBE_GET_STAT(adapter, mngptc); + regs_buff[961] = IXGBE_GET_STAT(adapter, tor); + regs_buff[963] = IXGBE_GET_STAT(adapter, tpr); + regs_buff[964] = IXGBE_GET_STAT(adapter, tpt); + regs_buff[965] = IXGBE_GET_STAT(adapter, ptc64); + regs_buff[966] = IXGBE_GET_STAT(adapter, ptc127); + regs_buff[967] = IXGBE_GET_STAT(adapter, ptc255); + regs_buff[968] = IXGBE_GET_STAT(adapter, ptc511); + regs_buff[969] = IXGBE_GET_STAT(adapter, ptc1023); + regs_buff[970] = IXGBE_GET_STAT(adapter, ptc1522); + regs_buff[971] = IXGBE_GET_STAT(adapter, mptc); + regs_buff[972] = IXGBE_GET_STAT(adapter, bptc); + regs_buff[973] = IXGBE_GET_STAT(adapter, xec); + for (i = 0; i < 16; i++) + regs_buff[974 + i] = IXGBE_GET_STAT(adapter, qprc[i]); + for (i = 0; i < 16; i++) + regs_buff[990 + i] = IXGBE_GET_STAT(adapter, qptc[i]); + for (i = 0; i < 16; i++) + regs_buff[1006 + i] = IXGBE_GET_STAT(adapter, qbrc[i]); + for (i = 0; i < 16; i++) + regs_buff[1022 + i] = IXGBE_GET_STAT(adapter, qbtc[i]); + + /* MAC */ + regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG); + regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL); + regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); + regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0); + regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1); + regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); + regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); + regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP); + regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP); + regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0); + regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1); + regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP); + regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA); + regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE); + regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD); + regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS); + regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA); + regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD); + regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD); + regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD); + regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG); + regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1); + regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2); + regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS); + regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC); + regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS); + regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC); + regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS); + regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3); + regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1); + regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2); + regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL); + + /* Diagnostic */ + regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL); + for (i = 0; i < 8; i++) + regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i)); + regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN); + for (i = 0; i < 4; i++) + regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i)); + regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE); + regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL); + for (i = 0; i < 8; i++) + regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i)); + regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN); + for (i = 0; i < 4; i++) + regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i)); + regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE); + regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL); + regs_buff[1102] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA0); + regs_buff[1103] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA1); + regs_buff[1104] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA2); + regs_buff[1105] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA3); + regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL); + regs_buff[1107] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA0); + regs_buff[1108] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA1); + regs_buff[1109] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA2); + regs_buff[1110] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA3); + for (i = 0; i < 8; i++) + regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i)); + regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL); + regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1); + regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2); + regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1); + regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2); + regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS); + regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL); + regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC); + regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC); + + /* 82599 X540 specific registers */ + regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN); +} + +static int ixgbe_get_eeprom_len(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + return adapter->hw.eeprom.word_size * 2; +} + +static int ixgbe_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u16 *eeprom_buff; + int first_word, last_word, eeprom_len; + int ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EINVAL; + + eeprom->magic = hw->vendor_id | (hw->device_id << 16); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_len = last_word - first_word + 1; + + eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + ret_val = ixgbe_read_eeprom_buffer(hw, first_word, eeprom_len, + eeprom_buff); + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < eeprom_len; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len); + kfree(eeprom_buff); + + return ret_val; +} + +static int ixgbe_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u16 *eeprom_buff; + void *ptr; + int max_len, first_word, last_word, ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EINVAL; + + if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) + return -EINVAL; + + max_len = hw->eeprom.word_size * 2; + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_buff = kmalloc(max_len, GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + ptr = eeprom_buff; + + if (eeprom->offset & 1) { + /* + * need read/modify/write of first changed EEPROM word + * only the second byte of the word is being modified + */ + ret_val = ixgbe_read_eeprom(hw, first_word, &eeprom_buff[0]); + if (ret_val) + goto err; + + ptr++; + } + if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { + /* + * need read/modify/write of last changed EEPROM word + * only the first byte of the word is being modified + */ + ret_val = ixgbe_read_eeprom(hw, last_word, + &eeprom_buff[last_word - first_word]); + if (ret_val) + goto err; + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(ptr, bytes, eeprom->len); + + for (i = 0; i < last_word - first_word + 1; i++) + cpu_to_le16s(&eeprom_buff[i]); + + ret_val = ixgbe_write_eeprom_buffer(hw, first_word, + last_word - first_word + 1, + eeprom_buff); + + /* Update the checksum */ + if (ret_val == 0) + ixgbe_update_eeprom_checksum(hw); + +err: + kfree(eeprom_buff); + return ret_val; +} + +static void ixgbe_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + strlcpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver)); + + strlcpy(drvinfo->version, ixgbe_driver_version, + sizeof(drvinfo->version)); + + strlcpy(drvinfo->fw_version, adapter->eeprom_id, + sizeof(drvinfo->fw_version)); + + strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), + sizeof(drvinfo->bus_info)); + + drvinfo->n_stats = IXGBE_STATS_LEN; + drvinfo->testinfo_len = IXGBE_TEST_LEN; + drvinfo->regdump_len = ixgbe_get_regs_len(netdev); +} + +static void ixgbe_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = IXGBE_MAX_RXD; + ring->tx_max_pending = IXGBE_MAX_TXD; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = adapter->rx_ring_count; + ring->tx_pending = adapter->tx_ring_count; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int ixgbe_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_ring *tx_ring = NULL, *rx_ring = NULL; + u32 new_rx_count, new_tx_count; + int i, err = 0; + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + + new_tx_count = clamp_t(u32, ring->tx_pending, + IXGBE_MIN_TXD, IXGBE_MAX_TXD); + new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE); + + new_rx_count = clamp_t(u32, ring->rx_pending, + IXGBE_MIN_RXD, IXGBE_MAX_RXD); + new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE); + + /* if nothing to do return success */ + if ((new_tx_count == adapter->tx_ring_count) && + (new_rx_count == adapter->rx_ring_count)) + return 0; + + while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (!netif_running(adapter->netdev)) { + for (i = 0; i < adapter->num_tx_queues; i++) + adapter->tx_ring[i]->count = new_tx_count; + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->count = new_rx_count; + adapter->tx_ring_count = new_tx_count; + adapter->rx_ring_count = new_rx_count; + goto clear_reset; + } + + /* alloc updated Tx resources */ + if (new_tx_count != adapter->tx_ring_count) { + tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring)); + if (!tx_ring) { + err = -ENOMEM; + goto clear_reset; + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + /* clone ring and setup updated count */ + tx_ring[i] = *adapter->tx_ring[i]; + tx_ring[i].count = new_tx_count; + err = ixgbe_setup_tx_resources(&tx_ring[i]); + if (err) { + while (i) { + i--; + ixgbe_free_tx_resources(&tx_ring[i]); + } + + vfree(tx_ring); + tx_ring = NULL; + + goto clear_reset; + } + } + } + + /* alloc updated Rx resources */ + if (new_rx_count != adapter->rx_ring_count) { + rx_ring = vmalloc(adapter->num_rx_queues * sizeof(*rx_ring)); + if (!rx_ring) { + err = -ENOMEM; + goto clear_reset; + } + + for (i = 0; i < adapter->num_rx_queues; i++) { + /* clone ring and setup updated count */ + rx_ring[i] = *adapter->rx_ring[i]; + rx_ring[i].count = new_rx_count; + err = ixgbe_setup_rx_resources(&rx_ring[i]); + if (err) { + while (i) { + i--; + ixgbe_free_rx_resources(&rx_ring[i]); + } + + vfree(rx_ring); + rx_ring = NULL; + + goto clear_reset; + } + } + } + + /* bring interface down to prepare for update */ + ixgbe_down(adapter); + + /* Tx */ + if (tx_ring) { + for (i = 0; i < adapter->num_tx_queues; i++) { + ixgbe_free_tx_resources(adapter->tx_ring[i]); + *adapter->tx_ring[i] = tx_ring[i]; + } + adapter->tx_ring_count = new_tx_count; + + vfree(tx_ring); + tx_ring = NULL; + } + + /* Rx */ + if (rx_ring) { + for (i = 0; i < adapter->num_rx_queues; i++) { + ixgbe_free_rx_resources(adapter->rx_ring[i]); + *adapter->rx_ring[i] = rx_ring[i]; + } + adapter->rx_ring_count = new_rx_count; + + vfree(rx_ring); + rx_ring = NULL; + } + + /* restore interface using new values */ + ixgbe_up(adapter); + +clear_reset: + /* free Tx resources if Rx error is encountered */ + if (tx_ring) { + for (i = 0; i < adapter->num_tx_queues; i++) + ixgbe_free_tx_resources(&tx_ring[i]); + vfree(tx_ring); + } + + clear_bit(__IXGBE_RESETTING, &adapter->state); + return err; +} + +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT +static int ixgbe_get_stats_count(struct net_device *netdev) +{ + return IXGBE_STATS_LEN; +} + +#else /* HAVE_ETHTOOL_GET_SSET_COUNT */ +static int ixgbe_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_TEST: + return IXGBE_TEST_LEN; + case ETH_SS_STATS: + return IXGBE_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ +static void ixgbe_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif + u64 *queue_stat; + int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64); + int i, j, k; + char *p; + + printk(KERN_DEBUG "ixgbe_stats 0\n"); + ixgbe_update_stats(adapter); + printk(KERN_DEBUG "ixgbe_stats 1\n"); + + for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) { + p = (char *)net_stats + ixgbe_gstrings_net_stats[i].stat_offset; + data[i] = (ixgbe_gstrings_net_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < IXGBE_GLOBAL_STATS_LEN; j++, i++) { + p = (char *)adapter + ixgbe_gstrings_stats[j].stat_offset; + data[i] = (ixgbe_gstrings_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + printk(KERN_DEBUG "ixgbe_stats 2\n"); +#ifdef NO_VNIC + for (j = 0; j < adapter->num_tx_queues; j++) { + queue_stat = (u64 *)&adapter->tx_ring[j]->stats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + i += k; + } + for (j = 0; j < adapter->num_rx_queues; j++) { + queue_stat = (u64 *)&adapter->rx_ring[j]->stats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + i += k; + } + printk(KERN_DEBUG "ixgbe_stats 3\n"); +#endif + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxontxc[j]; + data[i++] = adapter->stats.pxofftxc[j]; + } + for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxonrxc[j]; + data[i++] = adapter->stats.pxoffrxc[j]; + } + } + printk(KERN_DEBUG "ixgbe_stats 4\n"); + stat_count = sizeof(struct vf_stats) / sizeof(u64); + for (j = 0; j < adapter->num_vfs; j++) { + queue_stat = (u64 *)&adapter->vfinfo[j].vfstats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + queue_stat = (u64 *)&adapter->vfinfo[j].saved_rst_vfstats; + for (k = 0; k < stat_count; k++) + data[i + k] += queue_stat[k]; + i += k; + } +} + +static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, + u8 *data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + char *p = (char *)data; + int i; + + switch (stringset) { + case ETH_SS_TEST: + memcpy(data, *ixgbe_gstrings_test, + IXGBE_TEST_LEN * ETH_GSTRING_LEN); + break; + case ETH_SS_STATS: + for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) { + memcpy(p, ixgbe_gstrings_net_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) { + memcpy(p, ixgbe_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_tx_queues; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_rx_queues; i++) { + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { + sprintf(p, "tx_pb_%u_pxon", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_pb_%u_pxoff", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) { + sprintf(p, "rx_pb_%u_pxon", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_pb_%u_pxoff", i); + p += ETH_GSTRING_LEN; + } + } + for (i = 0; i < adapter->num_vfs; i++) { + sprintf(p, "VF %d Rx Packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d Rx Bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d Tx Packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d Tx Bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d MC Packets", i); + p += ETH_GSTRING_LEN; + } + /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ + break; + } +} + +static int ixgbe_link_test(struct ixgbe_adapter *adapter, u64 *data) +{ + struct ixgbe_hw *hw = &adapter->hw; + bool link_up; + u32 link_speed = 0; + *data = 0; + + hw->mac.ops.check_link(hw, &link_speed, &link_up, true); + if (link_up) + return *data; + else + *data = 1; + return *data; +} + +/* ethtool register test data */ +struct ixgbe_reg_test { + u16 reg; + u8 array_len; + u8 test_type; + u32 mask; + u32 write; +}; + +/* In the hardware, registers are laid out either singly, in arrays + * spaced 0x40 bytes apart, or in contiguous tables. We assume + * most tests take place on arrays or single registers (handled + * as a single-element array) and special-case the tables. + * Table tests are always pattern tests. + * + * We also make provision for some required setup steps by specifying + * registers to be written without any read-back testing. + */ + +#define PATTERN_TEST 1 +#define SET_READ_TEST 2 +#define WRITE_NO_TEST 3 +#define TABLE32_TEST 4 +#define TABLE64_TEST_LO 5 +#define TABLE64_TEST_HI 6 + +/* default 82599 register test */ +static struct ixgbe_reg_test reg_test_82599[] = { + { IXGBE_FCRTL_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCRTH_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 }, + { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 }, + { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE }, + { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 }, + { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFF80 }, + { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000001, 0x00000001 }, + { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF }, + { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* default 82598 register test */ +static struct ixgbe_reg_test reg_test_82598[] = { + { IXGBE_FCRTL(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 }, + { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* Enable all four RX queues before testing. */ + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE }, + /* RDH is read-only for 82598, only test RDT. */ + { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 }, + { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TIPG, 1, PATTERN_TEST, 0x000000FF, 0x000000FF }, + { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000003, 0x00000003 }, + { IXGBE_DTXCTL, 1, SET_READ_TEST, 0x00000005, 0x00000005 }, + { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF }, + { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +#define REG_PATTERN_TEST(R, M, W) \ +{ \ + u32 pat, val, before; \ + const u32 _test[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; \ + for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { \ + before = readl(adapter->hw.hw_addr + R); \ + writel((_test[pat] & W), (adapter->hw.hw_addr + R)); \ + val = readl(adapter->hw.hw_addr + R); \ + if (val != (_test[pat] & W & M)) { \ + e_err(drv, "pattern test reg %04X failed: got " \ + "0x%08X expected 0x%08X\n", \ + R, val, (_test[pat] & W & M)); \ + *data = R; \ + writel(before, adapter->hw.hw_addr + R); \ + return 1; \ + } \ + writel(before, adapter->hw.hw_addr + R); \ + } \ +} + +#define REG_SET_AND_CHECK(R, M, W) \ +{ \ + u32 val, before; \ + before = readl(adapter->hw.hw_addr + R); \ + writel((W & M), (adapter->hw.hw_addr + R)); \ + val = readl(adapter->hw.hw_addr + R); \ + if ((W & M) != (val & M)) { \ + e_err(drv, "set/check reg %04X test failed: got 0x%08X " \ + "expected 0x%08X\n", R, (val & M), (W & M)); \ + *data = R; \ + writel(before, (adapter->hw.hw_addr + R)); \ + return 1; \ + } \ + writel(before, (adapter->hw.hw_addr + R)); \ +} + +static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data) +{ + struct ixgbe_reg_test *test; + u32 value, status_before, status_after; + u32 i, toggle; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + toggle = 0x7FFFF3FF; + test = reg_test_82598; + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + toggle = 0x7FFFF30F; + test = reg_test_82599; + break; + default: + *data = 1; + return 1; + break; + } + + /* + * Because the status register is such a special case, + * we handle it separately from the rest of the register + * tests. Some bits are read-only, some toggle, and some + * are writeable on newer MACs. + */ + status_before = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS); + value = (IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, toggle); + status_after = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle; + if (value != status_after) { + e_err(drv, "failed STATUS register test got: " + "0x%08X expected: 0x%08X\n", status_after, value); + *data = 1; + return 1; + } + /* restore previous status */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, status_before); + + /* + * Perform the remainder of the register test, looping through + * the test table until we either fail or reach the null entry. + */ + while (test->reg) { + for (i = 0; i < test->array_len; i++) { + switch (test->test_type) { + case PATTERN_TEST: + REG_PATTERN_TEST(test->reg + (i * 0x40), + test->mask, + test->write); + break; + case SET_READ_TEST: + REG_SET_AND_CHECK(test->reg + (i * 0x40), + test->mask, + test->write); + break; + case WRITE_NO_TEST: + writel(test->write, + (adapter->hw.hw_addr + test->reg) + + (i * 0x40)); + break; + case TABLE32_TEST: + REG_PATTERN_TEST(test->reg + (i * 4), + test->mask, + test->write); + break; + case TABLE64_TEST_LO: + REG_PATTERN_TEST(test->reg + (i * 8), + test->mask, + test->write); + break; + case TABLE64_TEST_HI: + REG_PATTERN_TEST((test->reg + 4) + (i * 8), + test->mask, + test->write); + break; + } + } + test++; + } + + *data = 0; + return 0; +} + +static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data) +{ + if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL)) + *data = 1; + else + *data = 0; + return *data; +} + +static irqreturn_t ixgbe_test_intr(int irq, void *data) +{ + struct net_device *netdev = (struct net_device *) data; + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR); + + return IRQ_HANDLED; +} + +static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) +{ + struct net_device *netdev = adapter->netdev; + u32 mask, i = 0, shared_int = true; + u32 irq = adapter->pdev->irq; + + *data = 0; + + /* Hook up test interrupt handler just for this test */ + if (adapter->msix_entries) { + /* NOTE: we don't test MSI-X interrupts here, yet */ + return 0; + } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { + shared_int = false; + if (request_irq(irq, &ixgbe_test_intr, 0, netdev->name, + netdev)) { + *data = 1; + return -1; + } + } else if (!request_irq(irq, &ixgbe_test_intr, IRQF_PROBE_SHARED, + netdev->name, netdev)) { + shared_int = false; + } else if (request_irq(irq, &ixgbe_test_intr, IRQF_SHARED, + netdev->name, netdev)) { + *data = 1; + return -1; + } + e_info(hw, "testing %s interrupt\n", + (shared_int ? "shared" : "unshared")); + + /* Disable all the interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + /* Test each interrupt */ + for (; i < 10; i++) { + /* Interrupt to test */ + mask = 1 << i; + + if (!shared_int) { + /* + * Disable the interrupts to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, + ~mask & 0x00007FFF); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, + ~mask & 0x00007FFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + if (adapter->test_icr & mask) { + *data = 3; + break; + } + } + + /* + * Enable the interrupt to be reported in the cause + * register and then force the same interrupt and see + * if one gets posted. If an interrupt was not posted + * to the bus, the test failed. + */ + adapter->test_icr = 0; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + if (!(adapter->test_icr & mask)) { + *data = 4; + break; + } + + if (!shared_int) { + /* + * Disable the other interrupts to be reported in + * the cause register and then force the other + * interrupts and see if any get posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, + ~mask & 0x00007FFF); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, + ~mask & 0x00007FFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + if (adapter->test_icr) { + *data = 5; + break; + } + } + } + + /* Disable all the interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + /* Unhook test interrupt handler */ + free_irq(irq, netdev); + + return *data; +} + + + +static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 reg_data; + + /* X540 needs to set the MACC.FLU bit to force link up */ + if (adapter->hw.mac.type == ixgbe_mac_X540) { + reg_data = IXGBE_READ_REG(hw, IXGBE_MACC); + reg_data |= IXGBE_MACC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data); + } + + /* right now we only support MAC loopback in the driver */ + reg_data = IXGBE_READ_REG(hw, IXGBE_HLREG0); + /* Setup MAC loopback */ + reg_data |= IXGBE_HLREG0_LPBK; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_data); + + reg_data = IXGBE_READ_REG(hw, IXGBE_FCTRL); + reg_data |= IXGBE_FCTRL_BAM | IXGBE_FCTRL_SBP | IXGBE_FCTRL_MPE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_data); + + reg_data = IXGBE_READ_REG(hw, IXGBE_AUTOC); + reg_data &= ~IXGBE_AUTOC_LMS_MASK; + reg_data |= IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_data); + IXGBE_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + /* Disable Atlas Tx lanes; re-enabled in reset path */ + if (hw->mac.type == ixgbe_mac_82598EB) { + u8 atlas; + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_REG_EN; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, atlas); + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, atlas); + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, atlas); + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, atlas); + } + + return 0; +} + +static void ixgbe_loopback_cleanup(struct ixgbe_adapter *adapter) +{ + u32 reg_data; + + reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_HLREG0); + reg_data &= ~IXGBE_HLREG0_LPBK; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_HLREG0, reg_data); +} + + + + + + +static int ixgbe_loopback_test(struct ixgbe_adapter *adapter, u64 *data) +{ + + //*data = ixgbe_setup_desc_rings(adapter); + //if (*data) + // goto out; + *data = ixgbe_setup_loopback_test(adapter); + if (*data) + goto err_loopback; + //*data = ixgbe_run_loopback_test(adapter); + ixgbe_loopback_cleanup(adapter); + +err_loopback: + //ixgbe_free_desc_rings(adapter); +//out: + return *data; + +} + +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT +static int ixgbe_diag_test_count(struct net_device *netdev) +{ + return IXGBE_TEST_LEN; +} + +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ +static void ixgbe_diag_test(struct net_device *netdev, + struct ethtool_test *eth_test, u64 *data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + bool if_running = netif_running(netdev); + + set_bit(__IXGBE_TESTING, &adapter->state); + if (eth_test->flags == ETH_TEST_FL_OFFLINE) { + /* Offline tests */ + + e_info(hw, "offline testing starting\n"); + + /* Link test performed before hardware reset so autoneg doesn't + * interfere with test result */ + if (ixgbe_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + int i; + for (i = 0; i < adapter->num_vfs; i++) { + if (adapter->vfinfo[i].clear_to_send) { + e_warn(drv, "Please take active VFS " + "offline and restart the " + "adapter before running NIC " + "diagnostics\n"); + data[0] = 1; + data[1] = 1; + data[2] = 1; + data[3] = 1; + eth_test->flags |= ETH_TEST_FL_FAILED; + clear_bit(__IXGBE_TESTING, + &adapter->state); + goto skip_ol_tests; + } + } + } + + if (if_running) + /* indicate we're in test mode */ + dev_close(netdev); + else + ixgbe_reset(adapter); + + e_info(hw, "register testing starting\n"); + if (ixgbe_reg_test(adapter, &data[0])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + ixgbe_reset(adapter); + e_info(hw, "eeprom testing starting\n"); + if (ixgbe_eeprom_test(adapter, &data[1])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + ixgbe_reset(adapter); + e_info(hw, "interrupt testing starting\n"); + if (ixgbe_intr_test(adapter, &data[2])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* If SRIOV or VMDq is enabled then skip MAC + * loopback diagnostic. */ + if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED | + IXGBE_FLAG_VMDQ_ENABLED)) { + e_info(hw, "skip MAC loopback diagnostic in VT mode\n"); + data[3] = 0; + goto skip_loopback; + } + + ixgbe_reset(adapter); + e_info(hw, "loopback testing starting\n"); + if (ixgbe_loopback_test(adapter, &data[3])) + eth_test->flags |= ETH_TEST_FL_FAILED; + +skip_loopback: + ixgbe_reset(adapter); + + clear_bit(__IXGBE_TESTING, &adapter->state); + if (if_running) + dev_open(netdev); + } else { + e_info(hw, "online testing starting\n"); + /* Online tests */ + if (ixgbe_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* Online tests aren't run; pass by default */ + data[0] = 0; + data[1] = 0; + data[2] = 0; + data[3] = 0; + + clear_bit(__IXGBE_TESTING, &adapter->state); + } +skip_ol_tests: + msleep_interruptible(4 * 1000); +} + +static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter, + struct ethtool_wolinfo *wol) +{ + struct ixgbe_hw *hw = &adapter->hw; + int retval = 1; + u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; + + /* WOL not supported except for the following */ + switch (hw->device_id) { + case IXGBE_DEV_ID_82599_SFP: + /* Only these subdevice could supports WOL */ + switch (hw->subsystem_device_id) { + case IXGBE_SUBDEV_ID_82599_560FLR: + /* only support first port */ + if (hw->bus.func != 0) { + wol->supported = 0; + break; + } + case IXGBE_SUBDEV_ID_82599_SFP: + retval = 0; + break; + default: + wol->supported = 0; + break; + } + break; + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + /* All except this subdevice support WOL */ + if (hw->subsystem_device_id == + IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) { + wol->supported = 0; + break; + } + retval = 0; + break; + case IXGBE_DEV_ID_82599_KX4: + retval = 0; + break; + case IXGBE_DEV_ID_X540T: + /* check eeprom to see if enabled wol */ + if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && + (hw->bus.func == 0))) { + retval = 0; + break; + } + + /* All others not supported */ + wol->supported = 0; + break; + default: + wol->supported = 0; + } + return retval; +} + +static void ixgbe_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + wol->supported = WAKE_UCAST | WAKE_MCAST | + WAKE_BCAST | WAKE_MAGIC; + wol->wolopts = 0; + + if (ixgbe_wol_exclusion(adapter, wol) || + !device_can_wakeup(&adapter->pdev->dev)) + return; + + if (adapter->wol & IXGBE_WUFC_EX) + wol->wolopts |= WAKE_UCAST; + if (adapter->wol & IXGBE_WUFC_MC) + wol->wolopts |= WAKE_MCAST; + if (adapter->wol & IXGBE_WUFC_BC) + wol->wolopts |= WAKE_BCAST; + if (adapter->wol & IXGBE_WUFC_MAG) + wol->wolopts |= WAKE_MAGIC; +} + +static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE)) + return -EOPNOTSUPP; + + if (ixgbe_wol_exclusion(adapter, wol)) + return wol->wolopts ? -EOPNOTSUPP : 0; + + adapter->wol = 0; + + if (wol->wolopts & WAKE_UCAST) + adapter->wol |= IXGBE_WUFC_EX; + if (wol->wolopts & WAKE_MCAST) + adapter->wol |= IXGBE_WUFC_MC; + if (wol->wolopts & WAKE_BCAST) + adapter->wol |= IXGBE_WUFC_BC; + if (wol->wolopts & WAKE_MAGIC) + adapter->wol |= IXGBE_WUFC_MAG; + + device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + return 0; +} + +static int ixgbe_nway_reset(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + + return 0; +} + +#ifdef HAVE_ETHTOOL_SET_PHYS_ID +static int ixgbe_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + adapter->led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + return 2; + + case ETHTOOL_ID_ON: + hw->mac.ops.led_on(hw, IXGBE_LED_ON); + break; + + case ETHTOOL_ID_OFF: + hw->mac.ops.led_off(hw, IXGBE_LED_ON); + break; + + case ETHTOOL_ID_INACTIVE: + /* Restore LED settings */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_LEDCTL, adapter->led_reg); + break; + } + + return 0; +} +#else +static int ixgbe_phys_id(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + u32 i; + + if (!data || data > 300) + data = 300; + + for (i = 0; i < (data * 1000); i += 400) { + ixgbe_led_on(hw, IXGBE_LED_ON); + msleep_interruptible(200); + ixgbe_led_off(hw, IXGBE_LED_ON); + msleep_interruptible(200); + } + + /* Restore LED settings */ + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + + return 0; +} +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ + +static int ixgbe_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit; +#ifndef CONFIG_IXGBE_NAPI + ec->rx_max_coalesced_frames_irq = adapter->rx_work_limit; +#endif /* CONFIG_IXGBE_NAPI */ + /* only valid if in constant ITR mode */ + if (adapter->rx_itr_setting <= 1) + ec->rx_coalesce_usecs = adapter->rx_itr_setting; + else + ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; + + /* if in mixed tx/rx queues per vector mode, report only rx settings */ + if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count) + return 0; + + /* only valid if in constant ITR mode */ + if (adapter->tx_itr_setting <= 1) + ec->tx_coalesce_usecs = adapter->tx_itr_setting; + else + ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; + + return 0; +} + +/* + * this function must be called before setting the new value of + * rx_itr_setting + */ +#ifdef NO_VNIC +static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + /* nothing to do if LRO or RSC are not enabled */ + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) || + !(netdev->features & NETIF_F_LRO)) + return false; + + /* check the feature flag value and enable RSC if necessary */ + if (adapter->rx_itr_setting == 1 || + adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) { + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + e_info(probe, "rx-usecs value high enough " + "to re-enable RSC\n"); + return true; + } + /* if interrupt rate is too high then disable RSC */ + } else if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; +#ifdef IXGBE_NO_LRO + e_info(probe, "rx-usecs set too low, disabling RSC\n"); +#else + e_info(probe, "rx-usecs set too low, " + "falling back to software LRO\n"); +#endif + return true; + } + return false; +} +#endif + +static int ixgbe_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ +#ifdef NO_VNIC + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_q_vector *q_vector; + int i; + int num_vectors; + u16 tx_itr_param, rx_itr_param; + bool need_reset = false; + + /* don't accept tx specific changes if we've got mixed RxTx vectors */ + if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count + && ec->tx_coalesce_usecs) + return -EINVAL; + + if (ec->tx_max_coalesced_frames_irq) + adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq; + +#ifndef CONFIG_IXGBE_NAPI + if (ec->rx_max_coalesced_frames_irq) + adapter->rx_work_limit = ec->rx_max_coalesced_frames_irq; + +#endif + if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) || + (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2))) + return -EINVAL; + + if (ec->rx_coalesce_usecs > 1) + adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; + else + adapter->rx_itr_setting = ec->rx_coalesce_usecs; + + if (adapter->rx_itr_setting == 1) + rx_itr_param = IXGBE_20K_ITR; + else + rx_itr_param = adapter->rx_itr_setting; + + if (ec->tx_coalesce_usecs > 1) + adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; + else + adapter->tx_itr_setting = ec->tx_coalesce_usecs; + + if (adapter->tx_itr_setting == 1) + tx_itr_param = IXGBE_10K_ITR; + else + tx_itr_param = adapter->tx_itr_setting; + + /* check the old value and enable RSC if necessary */ + need_reset = ixgbe_update_rsc(adapter); + + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + else + num_vectors = 1; + + for (i = 0; i < num_vectors; i++) { + q_vector = adapter->q_vector[i]; + q_vector->tx.work_limit = adapter->tx_work_limit; + q_vector->rx.work_limit = adapter->rx_work_limit; + if (q_vector->tx.count && !q_vector->rx.count) + /* tx only */ + q_vector->itr = tx_itr_param; + else + /* rx only or mixed */ + q_vector->itr = rx_itr_param; + ixgbe_write_eitr(q_vector); + } + + /* + * do reset here at the end to make sure EITR==0 case is handled + * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings + * also locks in RSC enable/disable which requires reset + */ + if (need_reset) + ixgbe_do_reset(netdev); +#endif + return 0; +} + +#ifndef HAVE_NDO_SET_FEATURES +static u32 ixgbe_get_rx_csum(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_ring *ring = adapter->rx_ring[0]; + return test_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); +} + +static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *ring = adapter->rx_ring[i]; + if (data) + set_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); + else + clear_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); + } + + /* LRO and RSC both depend on RX checksum to function */ + if (!data && (netdev->features & NETIF_F_LRO)) { + netdev->features &= ~NETIF_F_LRO; + + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; + ixgbe_do_reset(netdev); + } + } + + return 0; +} + +static u32 ixgbe_get_tx_csum(struct net_device *netdev) +{ + return (netdev->features & NETIF_F_IP_CSUM) != 0; +} + +static int ixgbe_set_tx_csum(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u32 feature_list; + +#ifdef NETIF_F_IPV6_CSUM + feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; +#else + feature_list = NETIF_F_IP_CSUM; +#endif + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + feature_list |= NETIF_F_SCTP_CSUM; + break; + default: + break; + } + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + + return 0; +} + +#ifdef NETIF_F_TSO +static int ixgbe_set_tso(struct net_device *netdev, u32 data) +{ + if (data) { + netdev->features |= NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + netdev->features |= NETIF_F_TSO6; +#endif + } else { +#ifndef HAVE_NETDEV_VLAN_FEATURES +#ifdef NETIF_F_HW_VLAN_TX + struct ixgbe_adapter *adapter = netdev_priv(netdev); + /* disable TSO on all VLANs if they're present */ + if (adapter->vlgrp) { + int i; + struct net_device *v_netdev; + for (i = 0; i < VLAN_N_VID; i++) { + v_netdev = + vlan_group_get_device(adapter->vlgrp, i); + if (v_netdev) { + v_netdev->features &= ~NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + v_netdev->features &= ~NETIF_F_TSO6; +#endif + vlan_group_set_device(adapter->vlgrp, i, + v_netdev); + } + } + } +#endif +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + netdev->features &= ~NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + netdev->features &= ~NETIF_F_TSO6; +#endif + } + return 0; +} + +#endif /* NETIF_F_TSO */ +#ifdef ETHTOOL_GFLAGS +static int ixgbe_set_flags(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN; + u32 changed = netdev->features ^ data; + bool need_reset = false; + int rc; + +#ifndef HAVE_VLAN_RX_REGISTER + if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && + !(data & ETH_FLAG_RXVLAN)) + return -EINVAL; + +#endif +#ifdef NETIF_F_RXHASH + if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) + supported_flags |= ETH_FLAG_RXHASH; +#endif +#ifdef IXGBE_NO_LRO + if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) +#endif + supported_flags |= ETH_FLAG_LRO; + +#ifdef ETHTOOL_GRXRINGS + switch (adapter->hw.mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_82599EB: + supported_flags |= ETH_FLAG_NTUPLE; + default: + break; + } + +#endif + rc = ethtool_op_set_flags(netdev, data, supported_flags); + if (rc) + return rc; + +#ifndef HAVE_VLAN_RX_REGISTER + if (changed & ETH_FLAG_RXVLAN) + ixgbe_vlan_mode(netdev, netdev->features); + +#endif + /* if state changes we need to update adapter->flags and reset */ + if (!(netdev->features & NETIF_F_LRO)) { + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) + need_reset = true; + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; + } else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) && + !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { + if (adapter->rx_itr_setting == 1 || + adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) { + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + need_reset = true; + } else if (changed & ETH_FLAG_LRO) { +#ifdef IXGBE_NO_LRO + e_info(probe, "rx-usecs set too low, " + "disabling RSC\n"); +#else + e_info(probe, "rx-usecs set too low, " + "falling back to software LRO\n"); +#endif + } + } + +#ifdef ETHTOOL_GRXRINGS + /* + * Check if Flow Director n-tuple support was enabled or disabled. If + * the state changed, we need to reset. + */ + if (!(netdev->features & NETIF_F_NTUPLE)) { + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { + /* turn off Flow Director, set ATR and reset */ + if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) && + !(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; + need_reset = true; + } + adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + } else if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) { + /* turn off ATR, enable perfect filters and reset */ + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + need_reset = true; + } + +#endif /* ETHTOOL_GRXRINGS */ + if (need_reset) + ixgbe_do_reset(netdev); + + return 0; +} + +#endif /* ETHTOOL_GFLAGS */ +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_GRXRINGS +static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + union ixgbe_atr_input *mask = &adapter->fdir_mask; + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct hlist_node *node, *node2; + struct ixgbe_fdir_filter *rule = NULL; + + /* report total rule count */ + cmd->data = (1024 << adapter->fdir_pballoc) - 2; + + hlist_for_each_entry_safe(rule, node, node2, + &adapter->fdir_filter_list, fdir_node) { + if (fsp->location <= rule->sw_idx) + break; + } + + if (!rule || fsp->location != rule->sw_idx) + return -EINVAL; + + /* fill out the flow spec entry */ + + /* set flow type field */ + switch (rule->filter.formatted.flow_type) { + case IXGBE_ATR_FLOW_TYPE_TCPV4: + fsp->flow_type = TCP_V4_FLOW; + break; + case IXGBE_ATR_FLOW_TYPE_UDPV4: + fsp->flow_type = UDP_V4_FLOW; + break; + case IXGBE_ATR_FLOW_TYPE_SCTPV4: + fsp->flow_type = SCTP_V4_FLOW; + break; + case IXGBE_ATR_FLOW_TYPE_IPV4: + fsp->flow_type = IP_USER_FLOW; + fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; + fsp->h_u.usr_ip4_spec.proto = 0; + fsp->m_u.usr_ip4_spec.proto = 0; + break; + default: + return -EINVAL; + } + + fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port; + fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port; + fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port; + fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port; + fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0]; + fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0]; + fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0]; + fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0]; + fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id; + fsp->m_ext.vlan_tci = mask->formatted.vlan_id; + fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes; + fsp->m_ext.vlan_etype = mask->formatted.flex_bytes; + fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool); + fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool); + fsp->flow_type |= FLOW_EXT; + + /* record action */ + if (rule->action == IXGBE_FDIR_DROP_QUEUE) + fsp->ring_cookie = RX_CLS_FLOW_DISC; + else + fsp->ring_cookie = rule->action; + + return 0; +} + +static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct hlist_node *node, *node2; + struct ixgbe_fdir_filter *rule; + int cnt = 0; + + /* report total rule count */ + cmd->data = (1024 << adapter->fdir_pballoc) - 2; + + hlist_for_each_entry_safe(rule, node, node2, + &adapter->fdir_filter_list, fdir_node) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + rule_locs[cnt] = rule->sw_idx; + cnt++; + } + + cmd->rule_cnt = cnt; + + return 0; +} + +static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + cmd->data = 0; + + /* if RSS is disabled then report no hashing */ + if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) + return 0; + + /* Report default options for RSS on ixgbe */ + switch (cmd->flow_type) { + case TCP_V4_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V4_FLOW: + if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case IPV4_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case TCP_V6_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V6_FLOW: + if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V6_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case IPV6_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, +#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS + void *rule_locs) +#else + u32 *rule_locs) +#endif +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = adapter->num_rx_queues; + ret = 0; + break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = adapter->fdir_filter_count; + ret = 0; + break; + case ETHTOOL_GRXCLSRULE: + ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + ret = ixgbe_get_ethtool_fdir_all(adapter, cmd, + (u32 *)rule_locs); + break; + case ETHTOOL_GRXFH: + ret = ixgbe_get_rss_hash_opts(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct hlist_node *node, *node2, *parent; + struct ixgbe_fdir_filter *rule; + int err = -EINVAL; + + parent = NULL; + rule = NULL; + + hlist_for_each_entry_safe(rule, node, node2, + &adapter->fdir_filter_list, fdir_node) { + /* hash found, or no matching entry */ + if (rule->sw_idx >= sw_idx) + break; + parent = node; + } + + /* if there is an old rule occupying our place remove it */ + if (rule && (rule->sw_idx == sw_idx)) { + if (!input || (rule->filter.formatted.bkt_hash != + input->filter.formatted.bkt_hash)) { + err = ixgbe_fdir_erase_perfect_filter_82599(hw, + &rule->filter, + sw_idx); + } + + hlist_del(&rule->fdir_node); + kfree(rule); + adapter->fdir_filter_count--; + } + + /* + * If no input this was a delete, err should be 0 if a rule was + * successfully found and removed from the list else -EINVAL + */ + if (!input) + return err; + + /* initialize node and set software index */ + INIT_HLIST_NODE(&input->fdir_node); + + /* add filter to the list */ + if (parent) + hlist_add_after(parent, &input->fdir_node); + else + hlist_add_head(&input->fdir_node, + &adapter->fdir_filter_list); + + /* update counts */ + adapter->fdir_filter_count++; + + return 0; +} + +static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp, + u8 *flow_type) +{ + switch (fsp->flow_type & ~FLOW_EXT) { + case TCP_V4_FLOW: + *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4; + break; + case UDP_V4_FLOW: + *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4; + break; + case SCTP_V4_FLOW: + *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4; + break; + case IP_USER_FLOW: + switch (fsp->h_u.usr_ip4_spec.proto) { + case IPPROTO_TCP: + *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4; + break; + case IPPROTO_UDP: + *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4; + break; + case IPPROTO_SCTP: + *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4; + break; + case 0: + if (!fsp->m_u.usr_ip4_spec.proto) { + *flow_type = IXGBE_ATR_FLOW_TYPE_IPV4; + break; + } + default: + return 0; + } + break; + default: + return 0; + } + + return 1; +} + +static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_fdir_filter *input; + union ixgbe_atr_input mask; + int err; + + if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) + return -EOPNOTSUPP; + + /* + * Don't allow programming if the action is a queue greater than + * the number of online Rx queues. + */ + if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) && + (fsp->ring_cookie >= adapter->num_rx_queues)) + return -EINVAL; + + /* Don't allow indexes to exist outside of available space */ + if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) { + e_err(drv, "Location out of range\n"); + return -EINVAL; + } + + input = kzalloc(sizeof(*input), GFP_ATOMIC); + if (!input) + return -ENOMEM; + + memset(&mask, 0, sizeof(union ixgbe_atr_input)); + + /* set SW index */ + input->sw_idx = fsp->location; + + /* record flow type */ + if (!ixgbe_flowspec_to_flow_type(fsp, + &input->filter.formatted.flow_type)) { + e_err(drv, "Unrecognized flow type\n"); + goto err_out; + } + + mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | + IXGBE_ATR_L4TYPE_MASK; + + if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4) + mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK; + + /* Copy input into formatted structures */ + input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; + mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src; + input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; + mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst; + input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc; + mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc; + input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst; + mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst; + + if (fsp->flow_type & FLOW_EXT) { + input->filter.formatted.vm_pool = + (unsigned char)ntohl(fsp->h_ext.data[1]); + mask.formatted.vm_pool = + (unsigned char)ntohl(fsp->m_ext.data[1]); + input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci; + mask.formatted.vlan_id = fsp->m_ext.vlan_tci; + input->filter.formatted.flex_bytes = + fsp->h_ext.vlan_etype; + mask.formatted.flex_bytes = fsp->m_ext.vlan_etype; + } + + /* determine if we need to drop or route the packet */ + if (fsp->ring_cookie == RX_CLS_FLOW_DISC) + input->action = IXGBE_FDIR_DROP_QUEUE; + else + input->action = fsp->ring_cookie; + + spin_lock(&adapter->fdir_perfect_lock); + + if (hlist_empty(&adapter->fdir_filter_list)) { + /* save mask and program input mask into HW */ + memcpy(&adapter->fdir_mask, &mask, sizeof(mask)); + err = ixgbe_fdir_set_input_mask_82599(hw, &mask); + if (err) { + e_err(drv, "Error writing mask\n"); + goto err_out_w_lock; + } + } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) { + e_err(drv, "Only one mask supported per port\n"); + goto err_out_w_lock; + } + + /* apply mask and compute/store hash */ + ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); + + /* program filters to filter memory */ + err = ixgbe_fdir_write_perfect_filter_82599(hw, + &input->filter, input->sw_idx, + (input->action == IXGBE_FDIR_DROP_QUEUE) ? + IXGBE_FDIR_DROP_QUEUE : + adapter->rx_ring[input->action]->reg_idx); + if (err) + goto err_out_w_lock; + + ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); + + spin_unlock(&adapter->fdir_perfect_lock); + + kfree(input); + return err; +err_out_w_lock: + spin_unlock(&adapter->fdir_perfect_lock); +err_out: + kfree(input); + return -EINVAL; +} + +static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + int err; + + spin_lock(&adapter->fdir_perfect_lock); + err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, (u16)(fsp->location)); + spin_unlock(&adapter->fdir_perfect_lock); + + return err; +} + +#ifdef ETHTOOL_SRXNTUPLE +/* + * We need to keep this around for kernels 2.6.33 - 2.6.39 in order to avoid + * a null pointer dereference as it was assumend if the NETIF_F_NTUPLE flag + * was defined that this function was present. + */ +static int ixgbe_set_rx_ntuple(struct net_device *dev, + struct ethtool_rx_ntuple *cmd) +{ + return -EOPNOTSUPP; +} + +#endif +#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \ + IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) +static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *nfc) +{ + u32 flags2 = adapter->flags2; + + /* + * RSS does not support anything other than hashing + * to queues on src and dst IPs and ports + */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + !(nfc->data & RXH_L4_B_0_1) || + !(nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + case UDP_V4_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP; + break; + default: + return -EINVAL; + } + break; + case UDP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP; + break; + default: + return -EINVAL; + } + break; + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + (nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* if we changed something we need to update flags */ + if (flags2 != adapter->flags2) { + struct ixgbe_hw *hw = &adapter->hw; + u32 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); + + if ((flags2 & UDP_RSS_FLAGS) && + !(adapter->flags2 & UDP_RSS_FLAGS)) + e_warn(drv, "enabling UDP RSS: fragmented packets" + " may arrive out of order to the stack above\n"); + + adapter->flags2 = flags2; + + /* Perform hash on these packet types */ + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4 + | IXGBE_MRQC_RSS_FIELD_IPV4_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6 + | IXGBE_MRQC_RSS_FIELD_IPV6_TCP; + + mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP); + + if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; + + if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; + + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + } + + return 0; +} + +static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd); + break; + case ETHTOOL_SRXFH: + ret = ixgbe_set_rss_hash_opt(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +#endif /* ETHTOOL_GRXRINGS */ +//static +struct ethtool_ops ixgbe_ethtool_ops = { + .get_settings = ixgbe_get_settings, + .set_settings = ixgbe_set_settings, + .get_drvinfo = ixgbe_get_drvinfo, + .get_regs_len = ixgbe_get_regs_len, + .get_regs = ixgbe_get_regs, + .get_wol = ixgbe_get_wol, + .set_wol = ixgbe_set_wol, + .nway_reset = ixgbe_nway_reset, + .get_link = ethtool_op_get_link, + .get_eeprom_len = ixgbe_get_eeprom_len, + .get_eeprom = ixgbe_get_eeprom, + .set_eeprom = ixgbe_set_eeprom, + .get_ringparam = ixgbe_get_ringparam, + .set_ringparam = ixgbe_set_ringparam, + .get_pauseparam = ixgbe_get_pauseparam, + .set_pauseparam = ixgbe_set_pauseparam, + .get_msglevel = ixgbe_get_msglevel, + .set_msglevel = ixgbe_set_msglevel, +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT + .self_test_count = ixgbe_diag_test_count, +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ + .self_test = ixgbe_diag_test, + .get_strings = ixgbe_get_strings, +#ifdef HAVE_ETHTOOL_SET_PHYS_ID + .set_phys_id = ixgbe_set_phys_id, +#else + .phys_id = ixgbe_phys_id, +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT + .get_stats_count = ixgbe_get_stats_count, +#else /* HAVE_ETHTOOL_GET_SSET_COUNT */ + .get_sset_count = ixgbe_get_sset_count, +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ + .get_ethtool_stats = ixgbe_get_ethtool_stats, +#ifdef HAVE_ETHTOOL_GET_PERM_ADDR + .get_perm_addr = ethtool_op_get_perm_addr, +#endif + .get_coalesce = ixgbe_get_coalesce, + .set_coalesce = ixgbe_set_coalesce, +#ifndef HAVE_NDO_SET_FEATURES + .get_rx_csum = ixgbe_get_rx_csum, + .set_rx_csum = ixgbe_set_rx_csum, + .get_tx_csum = ixgbe_get_tx_csum, + .set_tx_csum = ixgbe_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, +#ifdef NETIF_F_TSO + .get_tso = ethtool_op_get_tso, + .set_tso = ixgbe_set_tso, +#endif +#ifdef ETHTOOL_GFLAGS + .get_flags = ethtool_op_get_flags, + .set_flags = ixgbe_set_flags, +#endif +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_GRXRINGS + .get_rxnfc = ixgbe_get_rxnfc, + .set_rxnfc = ixgbe_set_rxnfc, +#ifdef ETHTOOL_SRXNTUPLE + .set_rx_ntuple = ixgbe_set_rx_ntuple, +#endif +#endif +}; + +void ixgbe_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops); +} +#endif /* SIOCETHTOOL */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h new file mode 100644 index 00000000..cad28622 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h @@ -0,0 +1,91 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_FCOE_H +#define _IXGBE_FCOE_H + +#ifdef IXGBE_FCOE + +#include +#include + +/* shift bits within STAT fo FCSTAT */ +#define IXGBE_RXDADV_FCSTAT_SHIFT 4 + +/* ddp user buffer */ +#define IXGBE_BUFFCNT_MAX 256 /* 8 bits bufcnt */ +#define IXGBE_FCPTR_ALIGN 16 +#define IXGBE_FCPTR_MAX (IXGBE_BUFFCNT_MAX * sizeof(dma_addr_t)) +#define IXGBE_FCBUFF_4KB 0x0 +#define IXGBE_FCBUFF_8KB 0x1 +#define IXGBE_FCBUFF_16KB 0x2 +#define IXGBE_FCBUFF_64KB 0x3 +#define IXGBE_FCBUFF_MAX 65536 /* 64KB max */ +#define IXGBE_FCBUFF_MIN 4096 /* 4KB min */ +#define IXGBE_FCOE_DDP_MAX 512 /* 9 bits xid */ + +/* Default traffic class to use for FCoE */ +#define IXGBE_FCOE_DEFTC 3 + +/* fcerr */ +#define IXGBE_FCERR_BADCRC 0x00100000 +#define IXGBE_FCERR_EOFSOF 0x00200000 +#define IXGBE_FCERR_NOFIRST 0x00300000 +#define IXGBE_FCERR_OOOSEQ 0x00400000 +#define IXGBE_FCERR_NODMA 0x00500000 +#define IXGBE_FCERR_PKTLOST 0x00600000 + +/* FCoE DDP for target mode */ +#define __IXGBE_FCOE_TARGET 1 + +struct ixgbe_fcoe_ddp { + int len; + u32 err; + unsigned int sgc; + struct scatterlist *sgl; + dma_addr_t udp; + u64 *udl; + struct pci_pool *pool; +}; + +struct ixgbe_fcoe { + struct pci_pool **pool; + atomic_t refcnt; + spinlock_t lock; + struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX]; + unsigned char *extra_ddp_buffer; + dma_addr_t extra_ddp_buffer_dma; + u64 __percpu *pcpu_noddp; + u64 __percpu *pcpu_noddp_ext_buff; + unsigned long mode; + u8 tc; + u8 up; + u8 up_set; +}; +#endif /* IXGBE_FCOE */ + +#endif /* _IXGBE_FCOE_H */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c new file mode 100644 index 00000000..8c1d2fe3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c @@ -0,0 +1,2970 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/****************************************************************************** + Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code +******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SCTP +#include +#endif +#include +#include +#ifdef NETIF_F_TSO +#include +#ifdef NETIF_F_TSO6 +#include +#endif +#endif +#ifdef SIOCETHTOOL +#include +#endif + +#include "ixgbe.h" + +#undef CONFIG_DCA +#undef CONFIG_DCA_MODULE + +char ixgbe_driver_name[] = "ixgbe"; +static const char ixgbe_driver_string[] = + "Intel(R) 10 Gigabit PCI Express Network Driver"; +#define DRV_HW_PERF + +#ifndef CONFIG_IXGBE_NAPI +#define DRIVERNAPI +#else +#define DRIVERNAPI "-NAPI" +#endif + +#define FPGA + +#define VMDQ_TAG + +#define MAJ 3 +#define MIN 9 +#define BUILD 17 +#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ + __stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG +const char ixgbe_driver_version[] = DRV_VERSION; +static const char ixgbe_copyright[] = + "Copyright (c) 1999-2012 Intel Corporation."; + +/* ixgbe_pci_tbl - PCI Device ID Table + * + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, private data (not used) } + */ +DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = { + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)}, + /* required last entry */ + {0, } +}; + +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +static int ixgbe_notify_dca(struct notifier_block *, unsigned long event, + void *p); +static struct notifier_block dca_notifier = { + .notifier_call = ixgbe_notify_dca, + .next = NULL, + .priority = 0 +}; + +#endif +MODULE_AUTHOR("Intel Corporation, "); +MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +#define DEFAULT_DEBUG_LEVEL_SHIFT 3 + + +static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter) +{ + u32 ctrl_ext; + + /* Let firmware take over control of h/w */ + ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, + ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD); +} + +#ifdef NO_VNIC +static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter) +{ + u32 ctrl_ext; + + /* Let firmware know the driver has taken over */ + ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, + ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD); +} +#endif + + +static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw_stats *hwstats = &adapter->stats; + int i; + u32 data; + + if ((hw->fc.current_mode != ixgbe_fc_full) && + (hw->fc.current_mode != ixgbe_fc_rx_pause)) + return; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + break; + default: + data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } + hwstats->lxoffrxc += data; + + /* refill credits (no tx hang) if we received xoff */ + if (!data) + return; + + for (i = 0; i < adapter->num_tx_queues; i++) + clear_bit(__IXGBE_HANG_CHECK_ARMED, + &adapter->tx_ring[i]->state); +} + +static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw_stats *hwstats = &adapter->stats; + u32 xoff[8] = {0}; + int i; + bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; + +#ifdef HAVE_DCBNL_IEEE + if (adapter->ixgbe_ieee_pfc) + pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); + +#endif + if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) { + ixgbe_update_xoff_rx_lfc(adapter); + return; + } + + /* update stats for each tc, only valid with PFC enabled */ + for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); + break; + default: + xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); + } + hwstats->pxoffrxc[i] += xoff[i]; + } + + /* disarm tx queues that have received xoff frames */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; + u8 tc = tx_ring->dcb_tc; + + if ((tc <= 7) && (xoff[tc])) + clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state); + } +} + + + + +#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 + + + + +#ifdef HAVE_8021P_SUPPORT +/** + * ixgbe_vlan_stripping_disable - helper to disable vlan tag stripping + * @adapter: driver data + */ +void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vlnctrl; + int i; + + /* leave vlan tag stripping enabled for DCB */ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) + return; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl &= ~IXGBE_VLNCTRL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + for (i = 0; i < adapter->num_rx_queues; i++) { + u8 reg_idx = adapter->rx_ring[i]->reg_idx; + vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx)); + vlnctrl &= ~IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl); + } + break; + default: + break; + } +} + +#endif +/** + * ixgbe_vlan_stripping_enable - helper to enable vlan tag stripping + * @adapter: driver data + */ +void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vlnctrl; + int i; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl |= IXGBE_VLNCTRL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + for (i = 0; i < adapter->num_rx_queues; i++) { + u8 reg_idx = adapter->rx_ring[i]->reg_idx; + vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx)); + vlnctrl |= IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl); + } + break; + default: + break; + } +} + +#ifdef HAVE_VLAN_RX_REGISTER +void ixgbe_vlan_mode(struct net_device *netdev, struct vlan_group *grp) +#else +void ixgbe_vlan_mode(struct net_device *netdev, u32 features) +#endif +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); +#ifdef HAVE_8021P_SUPPORT + bool enable; +#endif +#ifdef HAVE_VLAN_RX_REGISTER + + //if (!test_bit(__IXGBE_DOWN, &adapter->state)) + // ixgbe_irq_disable(adapter); + + adapter->vlgrp = grp; + + //if (!test_bit(__IXGBE_DOWN, &adapter->state)) + // ixgbe_irq_enable(adapter, true, true); +#endif +#ifdef HAVE_8021P_SUPPORT +#ifdef HAVE_VLAN_RX_REGISTER + enable = (grp || (adapter->flags & IXGBE_FLAG_DCB_ENABLED)); +#else + enable = !!(features & NETIF_F_HW_VLAN_RX); +#endif + if (enable) + /* enable VLAN tag insert/strip */ + ixgbe_vlan_stripping_enable(adapter); + else + /* disable VLAN tag insert/strip */ + ixgbe_vlan_stripping_disable(adapter); + +#endif +} + +static u8 *ixgbe_addr_list_itr(struct ixgbe_hw *hw, u8 **mc_addr_ptr, u32 *vmdq) +{ +#ifdef NETDEV_HW_ADDR_T_MULTICAST + struct netdev_hw_addr *mc_ptr; +#else + struct dev_mc_list *mc_ptr; +#endif + struct ixgbe_adapter *adapter = hw->back; + u8 *addr = *mc_addr_ptr; + + *vmdq = adapter->num_vfs; + +#ifdef NETDEV_HW_ADDR_T_MULTICAST + mc_ptr = container_of(addr, struct netdev_hw_addr, addr[0]); + if (mc_ptr->list.next) { + struct netdev_hw_addr *ha; + + ha = list_entry(mc_ptr->list.next, struct netdev_hw_addr, list); + *mc_addr_ptr = ha->addr; + } +#else + mc_ptr = container_of(addr, struct dev_mc_list, dmi_addr[0]); + if (mc_ptr->next) + *mc_addr_ptr = mc_ptr->next->dmi_addr; +#endif + else + *mc_addr_ptr = NULL; + + return addr; +} + +/** + * ixgbe_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure + * + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA + **/ +int ixgbe_write_mc_addr_list(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; +#ifdef NETDEV_HW_ADDR_T_MULTICAST + struct netdev_hw_addr *ha; +#endif + u8 *addr_list = NULL; + int addr_count = 0; + + if (!hw->mac.ops.update_mc_addr_list) + return -ENOMEM; + + if (!netif_running(netdev)) + return 0; + + + hw->mac.ops.update_mc_addr_list(hw, NULL, 0, + ixgbe_addr_list_itr, true); + + if (!netdev_mc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_MULTICAST + ha = list_first_entry(&netdev->mc.list, + struct netdev_hw_addr, list); + addr_list = ha->addr; +#else + addr_list = netdev->mc_list->dmi_addr; +#endif + addr_count = netdev_mc_count(netdev); + + hw->mac.ops.update_mc_addr_list(hw, addr_list, addr_count, + ixgbe_addr_list_itr, false); + } + +#ifdef CONFIG_PCI_IOV + //ixgbe_restore_vf_multicasts(adapter); +#endif + return addr_count; +} + + +void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) { + hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + } else { + hw->mac.ops.clear_rar(hw, i); + } + } +} + +void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { + if (adapter->mac_table[i].state & + IXGBE_MAC_STATE_IN_USE) { + hw->mac.ops.set_rar(hw, i, + adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + } else { + hw->mac.ops.clear_rar(hw, i); + } + adapter->mac_table[i].state &= + ~(IXGBE_MAC_STATE_MODIFIED); + } + } +} + +int ixgbe_available_rars(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i, count = 0; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state == 0) + count++; + } + return count; +} + +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + + if (is_zero_ether_addr(addr)) + return 0; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + continue; + adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE); + memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN); + adapter->mac_table[i].queue = queue; + ixgbe_sync_mac_table(adapter); + return i; + } + return -ENOMEM; +} + +void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) +{ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + } + ixgbe_sync_mac_table(adapter); +} + +void ixgbe_del_mac_filter_by_index(struct ixgbe_adapter *adapter, int index) +{ + adapter->mac_table[index].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[index].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[index].addr, 0, ETH_ALEN); + adapter->mac_table[index].queue = 0; + ixgbe_sync_mac_table(adapter); +} + +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8* addr, u16 queue) +{ + /* search table for addr, if found, set to 0 and sync */ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + if (is_zero_ether_addr(addr)) + return 0; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (ether_addr_equal(addr, adapter->mac_table[i].addr) && + adapter->mac_table[i].queue == queue) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + ixgbe_sync_mac_table(adapter); + return 0; + } + } + return -ENOMEM; +} +#ifdef HAVE_SET_RX_MODE +/** + * ixgbe_write_uc_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure + * + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table + **/ +int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter, + struct net_device *netdev, unsigned int vfn) +{ + int count = 0; + + /* return ENOMEM indicating insufficient memory for addresses */ + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) + return -ENOMEM; + + if (!netdev_uc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + netdev_for_each_uc_addr(ha, netdev) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + ixgbe_del_mac_filter(adapter, ha->addr, (u16)vfn); + ixgbe_add_mac_filter(adapter, ha->addr, (u16)vfn); +#else + ixgbe_del_mac_filter(adapter, ha->da_addr, (u16)vfn); + ixgbe_add_mac_filter(adapter, ha->da_addr, (u16)vfn); +#endif + count++; + } + } + return count; +} + +#endif +/** + * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure + * + * The set_rx_method entry point is called whenever the unicast/multicast + * address list or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast and + * promiscuous mode. + **/ +void ixgbe_set_rx_mode(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE; + u32 vlnctrl; + int count; + + /* Check for Promiscuous and All Multicast modes */ + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + + /* set all bits that we expect to always be set */ + fctrl |= IXGBE_FCTRL_BAM; + fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ + fctrl |= IXGBE_FCTRL_PMCF; + + /* clear the bits we are changing the status of */ + fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); + + if (netdev->flags & IFF_PROMISC) { + hw->addr_ctrl.user_set_promisc = true; + fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + vmolr |= IXGBE_VMOLR_MPE; + } else { + if (netdev->flags & IFF_ALLMULTI) { + fctrl |= IXGBE_FCTRL_MPE; + vmolr |= IXGBE_VMOLR_MPE; + } else { + /* + * Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + count = ixgbe_write_mc_addr_list(netdev); + if (count < 0) { + fctrl |= IXGBE_FCTRL_MPE; + vmolr |= IXGBE_VMOLR_MPE; + } else if (count) { + vmolr |= IXGBE_VMOLR_ROMPE; + } + } +#ifdef NETIF_F_HW_VLAN_TX + /* enable hardware vlan filtering */ + vlnctrl |= IXGBE_VLNCTRL_VFE; +#endif + hw->addr_ctrl.user_set_promisc = false; +#ifdef HAVE_SET_RX_MODE + /* + * Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = ixgbe_write_uc_addr_list(adapter, netdev, + adapter->num_vfs); + if (count < 0) { + fctrl |= IXGBE_FCTRL_UPE; + vmolr |= IXGBE_VMOLR_ROPE; + } +#endif + } + + if (hw->mac.type != ixgbe_mac_82598EB) { + vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) & + ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE | + IXGBE_VMOLR_ROPE); + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr); + } + + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); +} + + + + + + + + +/* Additional bittime to account for IXGBE framing */ +#define IXGBE_ETH_FRAMING 20 + +/* + * ixgbe_hpbthresh - calculate high water mark for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int link, tc, kb, marker; + u32 dv_id, rx_pba; + + /* Calculate max LAN frame size */ + tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING; + +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if (dev->features & NETIF_F_FCOE_MTU) { + int fcoe_pb = 0; + + fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); + + if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; + } +#endif + + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_DV_X540(link, tc); + break; + default: + dv_id = IXGBE_DV(link, tc); + break; + } + + /* Loopback switch introduces additional latency */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + dv_id += IXGBE_B2BT(tc); + + /* Delay value is calculated in bit times convert to KB */ + kb = IXGBE_BT2KB(dv_id); + rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10; + + marker = rx_pba - kb; + + /* It is possible that the packet buffer is not large enough + * to provide required headroom. In this case throw an error + * to user and a do the best we can. + */ + if (marker < 0) { + e_warn(drv, "Packet Buffer(%i) can not provide enough" + "headroom to suppport flow control." + "Decrease MTU or number of traffic classes\n", pb); + marker = tc + 1; + } + + return marker; +} + +/* + * ixgbe_lpbthresh - calculate low water mark for for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int tc; + u32 dv_id; + + /* Calculate max LAN frame size */ + tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; + +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if (dev->features & NETIF_F_FCOE_MTU) { + int fcoe_pb = 0; + + fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); + + if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; + } +#endif + + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_LOW_DV_X540(tc); + break; + default: + dv_id = IXGBE_LOW_DV(tc); + break; + } + + /* Delay value is calculated in bit times convert to KB */ + return IXGBE_BT2KB(dv_id); +} + +/* + * ixgbe_pbthresh_setup - calculate and setup high low water marks + */ +static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int num_tc = netdev_get_num_tc(adapter->netdev); + int i; + + if (!num_tc) + num_tc = 1; + if (num_tc > IXGBE_DCB_MAX_TRAFFIC_CLASS) + num_tc = IXGBE_DCB_MAX_TRAFFIC_CLASS; + + for (i = 0; i < num_tc; i++) { + hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i); + hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i); + + /* Low water marks must not be larger than high water marks */ + if (hw->fc.low_water[i] > hw->fc.high_water[i]) + hw->fc.low_water[i] = 0; + } + + for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) + hw->fc.high_water[i] = 0; +} + + + +#ifdef NO_VNIC +static void ixgbe_configure(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + ixgbe_configure_pb(adapter); + ixgbe_configure_dcb(adapter); + + ixgbe_set_rx_mode(adapter->netdev); +#ifdef NETIF_F_HW_VLAN_TX + ixgbe_restore_vlan(adapter); +#endif + +#ifdef IXGBE_FCOE + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) + ixgbe_configure_fcoe(adapter); + +#endif /* IXGBE_FCOE */ + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) + hw->mac.ops.disable_sec_rx_path(hw); + + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + ixgbe_init_fdir_signature_82599(&adapter->hw, + adapter->fdir_pballoc); + } else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { + ixgbe_init_fdir_perfect_82599(&adapter->hw, + adapter->fdir_pballoc); + ixgbe_fdir_filter_restore(adapter); + } + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) + hw->mac.ops.enable_sec_rx_path(hw); + + ixgbe_configure_virtualization(adapter); + + ixgbe_configure_tx(adapter); + ixgbe_configure_rx(adapter); +} +#endif + +static bool ixgbe_is_sfp(struct ixgbe_hw *hw) +{ + switch (hw->phy.type) { + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + case ixgbe_phy_sfp_active_unknown: + case ixgbe_phy_sfp_ftl_active: + return true; + case ixgbe_phy_nl: + if (hw->mac.type == ixgbe_mac_82598EB) + return true; + default: + return false; + } +} + + +/** + * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset + * @adapter: board private structure + * + * On a reset we need to clear out the VF stats or accounting gets + * messed up because they're not clear on read. + **/ +void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < adapter->num_vfs; i++) { + adapter->vfinfo[i].last_vfstats.gprc = + IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i)); + adapter->vfinfo[i].saved_rst_vfstats.gprc += + adapter->vfinfo[i].vfstats.gprc; + adapter->vfinfo[i].vfstats.gprc = 0; + adapter->vfinfo[i].last_vfstats.gptc = + IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i)); + adapter->vfinfo[i].saved_rst_vfstats.gptc += + adapter->vfinfo[i].vfstats.gptc; + adapter->vfinfo[i].vfstats.gptc = 0; + adapter->vfinfo[i].last_vfstats.gorc = + IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i)); + adapter->vfinfo[i].saved_rst_vfstats.gorc += + adapter->vfinfo[i].vfstats.gorc; + adapter->vfinfo[i].vfstats.gorc = 0; + adapter->vfinfo[i].last_vfstats.gotc = + IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i)); + adapter->vfinfo[i].saved_rst_vfstats.gotc += + adapter->vfinfo[i].vfstats.gotc; + adapter->vfinfo[i].vfstats.gotc = 0; + adapter->vfinfo[i].last_vfstats.mprc = + IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i)); + adapter->vfinfo[i].saved_rst_vfstats.mprc += + adapter->vfinfo[i].vfstats.mprc; + adapter->vfinfo[i].vfstats.mprc = 0; + } +} + + + +void ixgbe_reinit_locked(struct ixgbe_adapter *adapter) +{ +#ifdef NO_VNIC + WARN_ON(in_interrupt()); + /* put off any impending NetWatchDogTimeout */ + adapter->netdev->trans_start = jiffies; + + while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + ixgbe_down(adapter); + /* + * If SR-IOV enabled then wait a bit before bringing the adapter + * back up to give the VFs time to respond to the reset. The + * two second wait is based upon the watchdog timer cycle in + * the VF driver. + */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + msleep(2000); + ixgbe_up(adapter); + clear_bit(__IXGBE_RESETTING, &adapter->state); +#endif +} + +void ixgbe_up(struct ixgbe_adapter *adapter) +{ + /* hardware has been reset, we need to reload some things */ + //ixgbe_configure(adapter); + + //ixgbe_up_complete(adapter); +} + +void ixgbe_reset(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + int err; + + /* lock SFP init bit to prevent race conditions with the watchdog */ + while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) + usleep_range(1000, 2000); + + /* clear all SFP and link config related flags while holding SFP_INIT */ + adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP | + IXGBE_FLAG2_SFP_NEEDS_RESET); + adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; + + err = hw->mac.ops.init_hw(hw); + switch (err) { + case 0: + case IXGBE_ERR_SFP_NOT_PRESENT: + case IXGBE_ERR_SFP_NOT_SUPPORTED: + break; + case IXGBE_ERR_MASTER_REQUESTS_PENDING: + e_dev_err("master disable timed out\n"); + break; + case IXGBE_ERR_EEPROM_VERSION: + /* We are running on a pre-production device, log a warning */ + e_dev_warn("This device is a pre-production adapter/LOM. " + "Please be aware there may be issues associated " + "with your hardware. If you are experiencing " + "problems please contact your Intel or hardware " + "representative who provided you with this " + "hardware.\n"); + break; + default: + e_dev_err("Hardware Error: %d\n", err); + } + + clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); + + ixgbe_flush_sw_mac_table(adapter); + memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr, + netdev->addr_len); + adapter->mac_table[0].queue = adapter->num_vfs; + adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | + IXGBE_MAC_STATE_IN_USE); + hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, + adapter->mac_table[0].queue, + IXGBE_RAH_AV); +} + + + + + + +void ixgbe_down(struct ixgbe_adapter *adapter) +{ +#ifdef NO_VNIC + struct net_device *netdev = adapter->netdev; + struct ixgbe_hw *hw = &adapter->hw; + u32 rxctrl; + int i; + + /* signal that we are down to the interrupt handler */ + set_bit(__IXGBE_DOWN, &adapter->state); + + /* disable receives */ + rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN); + + /* disable all enabled rx queues */ + for (i = 0; i < adapter->num_rx_queues; i++) + /* this call also flushes the previous write */ + ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]); + + usleep_range(10000, 20000); + + netif_tx_stop_all_queues(netdev); + + /* call carrier off first to avoid false dev_watchdog timeouts */ + netif_carrier_off(netdev); + netif_tx_disable(netdev); + + ixgbe_irq_disable(adapter); + + ixgbe_napi_disable_all(adapter); + + adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | + IXGBE_FLAG2_RESET_REQUESTED); + adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; + + del_timer_sync(&adapter->service_timer); + + if (adapter->num_vfs) { + /* Clear EITR Select mapping */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0); + + /* Mark all the VFs as inactive */ + for (i = 0 ; i < adapter->num_vfs; i++) + adapter->vfinfo[i].clear_to_send = 0; + + /* ping all the active vfs to let them know we are going down */ + ixgbe_ping_all_vfs(adapter); + + /* Disable all VFTE/VFRE TX/RX */ + ixgbe_disable_tx_rx(adapter); + } + + /* disable transmits in the hardware now that interrupts are off */ + for (i = 0; i < adapter->num_tx_queues; i++) { + u8 reg_idx = adapter->tx_ring[i]->reg_idx; + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH); + } + + /* Disable the Tx DMA engine on 82599 and X540 */ + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, + (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) & + ~IXGBE_DMATXCTL_TE)); + break; + default: + break; + } + +#ifdef HAVE_PCI_ERS + if (!pci_channel_offline(adapter->pdev)) +#endif + ixgbe_reset(adapter); + /* power down the optics */ + if ((hw->phy.multispeed_fiber) || + ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && + (hw->mac.type == ixgbe_mac_82599EB))) + ixgbe_disable_tx_laser(hw); + + ixgbe_clean_all_tx_rings(adapter); + ixgbe_clean_all_rx_rings(adapter); + +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) + /* since we reset the hardware DCA settings were cleared */ + ixgbe_setup_dca(adapter); +#endif + +#endif /* NO_VNIC */ +} + +#ifndef NO_VNIC + +#undef IXGBE_FCOE + +/* Artificial max queue cap per traffic class in DCB mode */ +#define DCB_QUEUE_CAP 8 + +/** + * ixgbe_set_dcb_queues: Allocate queues for a DCB-enabled device + * @adapter: board private structure to initialize + * + * When DCB (Data Center Bridging) is enabled, allocate queues for + * each traffic class. If multiqueue isn't available,then abort DCB + * initialization. + * + * This function handles all combinations of DCB, RSS, and FCoE. + * + **/ +static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) +{ + int tcs; +#ifdef HAVE_MQPRIO + int rss_i, i, offset = 0; + struct net_device *dev = adapter->netdev; + + /* Map queue offset and counts onto allocated tx queues */ + tcs = netdev_get_num_tc(dev); + + if (!tcs) + return false; + + rss_i = min_t(int, dev->num_tx_queues / tcs, num_online_cpus()); + + if (rss_i > DCB_QUEUE_CAP) + rss_i = DCB_QUEUE_CAP; + + for (i = 0; i < tcs; i++) { + netdev_set_tc_queue(dev, i, rss_i, offset); + offset += rss_i; + } + + adapter->num_tx_queues = rss_i * tcs; + adapter->num_rx_queues = rss_i * tcs; + +#ifdef IXGBE_FCOE + /* FCoE enabled queues require special configuration indexed + * by feature specific indices and mask. Here we map FCoE + * indices onto the DCB queue pairs allowing FCoE to own + * configuration later. + */ + + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { + struct ixgbe_ring_feature *f; + int tc; + u8 prio_tc[IXGBE_DCB_MAX_USER_PRIORITY] = {0}; + + ixgbe_dcb_unpack_map_cee(&adapter->dcb_cfg, + IXGBE_DCB_TX_CONFIG, + prio_tc); + tc = prio_tc[adapter->fcoe.up]; + + f = &adapter->ring_feature[RING_F_FCOE]; + f->indices = min_t(int, rss_i, f->indices); + f->mask = rss_i * tc; + } +#endif /* IXGBE_FCOE */ +#else + if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) + return false; + + /* Enable one Queue per traffic class */ + tcs = adapter->tc; + if (!tcs) + return false; + +#ifdef IXGBE_FCOE + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { + struct ixgbe_ring_feature *f; + int tc = netdev_get_prio_tc_map(adapter->netdev, + adapter->fcoe.up); + + f = &adapter->ring_feature[RING_F_FCOE]; + + /* + * We have max 8 queues for FCoE, where 8 the is + * FCoE redirection table size. We must also share + * ring resources with network traffic so if FCoE TC is + * 4 or greater and we are in 8 TC mode we can only use + * 7 queues. + */ + if ((tcs > 4) && (tc >= 4) && (f->indices > 7)) + f->indices = 7; + + f->indices = min_t(int, num_online_cpus(), f->indices); + f->mask = tcs; + + adapter->num_rx_queues = f->indices + tcs; + adapter->num_tx_queues = f->indices + tcs; + + return true; + } + +#endif /* IXGBE_FCOE */ + adapter->num_rx_queues = tcs; + adapter->num_tx_queues = tcs; +#endif /* HAVE_MQ */ + + return true; +} + +/** + * ixgbe_set_vmdq_queues: Allocate queues for VMDq devices + * @adapter: board private structure to initialize + * + * When VMDq (Virtual Machine Devices queue) is enabled, allocate queues + * and VM pools where appropriate. If RSS is available, then also try and + * enable RSS and map accordingly. + * + **/ +static bool ixgbe_set_vmdq_queues(struct ixgbe_adapter *adapter) +{ + int vmdq_i = adapter->ring_feature[RING_F_VMDQ].indices; + int vmdq_m = 0; + int rss_i = adapter->ring_feature[RING_F_RSS].indices; + unsigned long i; + int rss_shift; + bool ret = false; + + + switch (adapter->flags & (IXGBE_FLAG_RSS_ENABLED + | IXGBE_FLAG_DCB_ENABLED + | IXGBE_FLAG_VMDQ_ENABLED)) { + + case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED): + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + vmdq_i = min((int)IXGBE_MAX_VMDQ_INDICES, vmdq_i); + if (vmdq_i > 32) + rss_i = 2; + else + rss_i = 4; + i = rss_i; + rss_shift = find_first_bit(&i, sizeof(i) * 8); + vmdq_m = ((IXGBE_MAX_VMDQ_INDICES - 1) << + rss_shift) & (MAX_RX_QUEUES - 1); + break; + default: + break; + } + adapter->num_rx_queues = vmdq_i * rss_i; + adapter->num_tx_queues = min((int)MAX_TX_QUEUES, vmdq_i * rss_i); + ret = true; + break; + + case (IXGBE_FLAG_VMDQ_ENABLED): + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1) << 1; + break; + default: + break; + } + adapter->num_rx_queues = vmdq_i; + adapter->num_tx_queues = vmdq_i; + ret = true; + break; + + default: + ret = false; + goto vmdq_queues_out; + } + + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + adapter->num_rx_pools = vmdq_i; + adapter->num_rx_queues_per_pool = adapter->num_rx_queues / + vmdq_i; + } else { + adapter->num_rx_pools = adapter->num_rx_queues; + adapter->num_rx_queues_per_pool = 1; + } + /* save the mask for later use */ + adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m; +vmdq_queues_out: + return ret; +} + +/** + * ixgbe_set_rss_queues: Allocate queues for RSS + * @adapter: board private structure to initialize + * + * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try + * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. + * + **/ +static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ring_feature *f; + + if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) { + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + return false; + } + + /* set mask for 16 queue limit of RSS */ + f = &adapter->ring_feature[RING_F_RSS]; + f->mask = 0xF; + + /* + * Use Flow Director in addition to RSS to ensure the best + * distribution of flows across cores, even when an FDIR flow + * isn't matched. + */ + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + f = &adapter->ring_feature[RING_F_FDIR]; + + f->indices = min_t(int, num_online_cpus(), f->indices); + f->mask = 0; + } + + adapter->num_rx_queues = f->indices; +#ifdef HAVE_TX_MQ + adapter->num_tx_queues = f->indices; +#endif + + return true; +} + +#ifdef IXGBE_FCOE +/** + * ixgbe_set_fcoe_queues: Allocate queues for Fiber Channel over Ethernet (FCoE) + * @adapter: board private structure to initialize + * + * FCoE RX FCRETA can use up to 8 rx queues for up to 8 different exchanges. + * The ring feature mask is not used as a mask for FCoE, as it can take any 8 + * rx queues out of the max number of rx queues, instead, it is used as the + * index of the first rx queue used by FCoE. + * + **/ +static bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ring_feature *f; + + if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) + return false; + + ixgbe_set_rss_queues(adapter); + + f = &adapter->ring_feature[RING_F_FCOE]; + f->indices = min_t(int, num_online_cpus(), f->indices); + + /* adding FCoE queues */ + f->mask = adapter->num_rx_queues; + adapter->num_rx_queues += f->indices; + adapter->num_tx_queues += f->indices; + + return true; +} + +#endif /* IXGBE_FCOE */ +/* + * ixgbe_set_num_queues: Allocate queues for device, feature dependent + * @adapter: board private structure to initialize + * + * This is the top level queue allocation routine. The order here is very + * important, starting with the "most" number of features turned on at once, + * and ending with the smallest set of features. This way large combinations + * can be allocated if they're turned on, and smaller combinations are the + * fallthrough conditions. + * + **/ +static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) +{ + /* Start with base case */ + adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_rx_pools = adapter->num_rx_queues; + adapter->num_rx_queues_per_pool = 1; + + if (ixgbe_set_vmdq_queues(adapter)) + return; + + if (ixgbe_set_dcb_queues(adapter)) + return; + +#ifdef IXGBE_FCOE + if (ixgbe_set_fcoe_queues(adapter)) + return; + +#endif /* IXGBE_FCOE */ + ixgbe_set_rss_queues(adapter); +} + +#endif + + +/** + * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter) + * @adapter: board private structure to initialize + * + * ixgbe_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). + **/ +static int ixgbe_sw_init(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct pci_dev *pdev = adapter->pdev; + int err; + + /* PCI config space info */ + + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + + err = ixgbe_init_shared_code(hw); + if (err) { + e_err(probe, "init_shared_code failed: %d\n", err); + goto out; + } + adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * + hw->mac.num_rar_entries, + GFP_ATOMIC); + /* Set capability flags */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + adapter->flags |= IXGBE_FLAG_MSI_CAPABLE | + IXGBE_FLAG_MSIX_CAPABLE | + IXGBE_FLAG_MQ_CAPABLE | + IXGBE_FLAG_RSS_CAPABLE; + adapter->flags |= IXGBE_FLAG_DCB_CAPABLE; +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) + adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; +#endif + adapter->flags &= ~IXGBE_FLAG_SRIOV_CAPABLE; + adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE; + + if (hw->device_id == IXGBE_DEV_ID_82598AT) + adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE; + + adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82598; + break; + case ixgbe_mac_X540: + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; + case ixgbe_mac_82599EB: + adapter->flags |= IXGBE_FLAG_MSI_CAPABLE | + IXGBE_FLAG_MSIX_CAPABLE | + IXGBE_FLAG_MQ_CAPABLE | + IXGBE_FLAG_RSS_CAPABLE; + adapter->flags |= IXGBE_FLAG_DCB_CAPABLE; +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) + adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; +#endif + adapter->flags |= IXGBE_FLAG_SRIOV_CAPABLE; + adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; +#ifdef IXGBE_FCOE + adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE; + adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; + adapter->ring_feature[RING_F_FCOE].indices = 0; +#ifdef CONFIG_DCB + /* Default traffic class to use for FCoE */ + adapter->fcoe.tc = IXGBE_FCOE_DEFTC; + adapter->fcoe.up = IXGBE_FCOE_DEFTC; + adapter->fcoe.up_set = IXGBE_FCOE_DEFTC; +#endif +#endif + if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; +#ifndef IXGBE_NO_SMART_SPEED + hw->phy.smart_speed = ixgbe_smart_speed_on; +#else + hw->phy.smart_speed = ixgbe_smart_speed_off; +#endif + adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82599; + default: + break; + } + + /* n-tuple support exists, always init our spinlock */ + //spin_lock_init(&adapter->fdir_perfect_lock); + + if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) { + int j; + struct ixgbe_dcb_tc_config *tc; + int dcb_i = IXGBE_DCB_MAX_TRAFFIC_CLASS; + + + adapter->dcb_cfg.num_tcs.pg_tcs = dcb_i; + adapter->dcb_cfg.num_tcs.pfc_tcs = dcb_i; + for (j = 0; j < dcb_i; j++) { + tc = &adapter->dcb_cfg.tc_config[j]; + tc->path[IXGBE_DCB_TX_CONFIG].bwg_id = 0; + tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 100 / dcb_i; + tc->path[IXGBE_DCB_RX_CONFIG].bwg_id = 0; + tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 100 / dcb_i; + tc->pfc = ixgbe_dcb_pfc_disabled; + if (j == 0) { + /* total of all TCs bandwidth needs to be 100 */ + tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent += + 100 % dcb_i; + tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent += + 100 % dcb_i; + } + } + + /* Initialize default user to priority mapping, UPx->TC0 */ + tc = &adapter->dcb_cfg.tc_config[0]; + tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF; + tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF; + + adapter->dcb_cfg.bw_percentage[IXGBE_DCB_TX_CONFIG][0] = 100; + adapter->dcb_cfg.bw_percentage[IXGBE_DCB_RX_CONFIG][0] = 100; + adapter->dcb_cfg.rx_pba_cfg = ixgbe_dcb_pba_equal; + adapter->dcb_cfg.pfc_mode_enable = false; + adapter->dcb_cfg.round_robin_enable = false; + adapter->dcb_set_bitmap = 0x00; +#ifdef CONFIG_DCB + adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE; +#endif /* CONFIG_DCB */ + + if (hw->mac.type == ixgbe_mac_X540) { + adapter->dcb_cfg.num_tcs.pg_tcs = 4; + adapter->dcb_cfg.num_tcs.pfc_tcs = 4; + } + } +#ifdef CONFIG_DCB + /* XXX does this need to be initialized even w/o DCB? */ + //memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg, + // sizeof(adapter->temp_dcb_cfg)); + +#endif + //if (hw->mac.type == ixgbe_mac_82599EB || + // hw->mac.type == ixgbe_mac_X540) + // hw->mbx.ops.init_params(hw); + + /* default flow control settings */ + hw->fc.requested_mode = ixgbe_fc_full; + hw->fc.current_mode = ixgbe_fc_full; /* init for ethtool output */ + + adapter->last_lfc_mode = hw->fc.current_mode; + ixgbe_pbthresh_setup(adapter); + hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE; + hw->fc.send_xon = true; + hw->fc.disable_fc_autoneg = false; + + /* set default ring sizes */ + adapter->tx_ring_count = IXGBE_DEFAULT_TXD; + adapter->rx_ring_count = IXGBE_DEFAULT_RXD; + + /* set default work limits */ + adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK; + adapter->rx_work_limit = IXGBE_DEFAULT_RX_WORK; + + set_bit(__IXGBE_DOWN, &adapter->state); +out: + return err; +} + +/** + * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors) + * @tx_ring: tx descriptor ring (for a specific queue) to setup + * + * Return 0 on success, negative on failure + **/ +int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + //int orig_node = dev_to_node(dev); + int numa_node = -1; + int size; + + size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; + + if (tx_ring->q_vector) + numa_node = tx_ring->q_vector->numa_node; + + tx_ring->tx_buffer_info = vzalloc_node(size, numa_node); + if (!tx_ring->tx_buffer_info) + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + goto err; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + + //set_dev_node(dev, numa_node); + //tx_ring->desc = dma_alloc_coherent(dev, + // tx_ring->size, + // &tx_ring->dma, + // GFP_KERNEL); + //set_dev_node(dev, orig_node); + //if (!tx_ring->desc) + // tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + // &tx_ring->dma, GFP_KERNEL); + //if (!tx_ring->desc) + // goto err; + + return 0; + +err: + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); + return -ENOMEM; +} + +/** + * ixgbe_setup_all_tx_resources - allocate all queues Tx resources + * @adapter: board private structure + * + * If this function returns with an error, then it's possible one or + * more of the rings is populated (while the rest are not). It is the + * callers duty to clean those orphaned rings. + * + * Return 0 on success, negative on failure + **/ +static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) +{ + int i, err = 0; + + for (i = 0; i < adapter->num_tx_queues; i++) { + err = ixgbe_setup_tx_resources(adapter->tx_ring[i]); + if (!err) + continue; + e_err(probe, "Allocation for Tx Queue %u failed\n", i); + break; + } + + return err; +} + +/** + * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) + * @rx_ring: rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + **/ +int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + //int orig_node = dev_to_node(dev); + int numa_node = -1; + int size; + + size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; + + if (rx_ring->q_vector) + numa_node = rx_ring->q_vector->numa_node; + + rx_ring->rx_buffer_info = vzalloc_node(size, numa_node); + if (!rx_ring->rx_buffer_info) + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + goto err; + + /* Round up to nearest 4K */ + rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); + rx_ring->size = ALIGN(rx_ring->size, 4096); + +#ifdef NO_VNIC + set_dev_node(dev, numa_node); + rx_ring->desc = dma_alloc_coherent(dev, + rx_ring->size, + &rx_ring->dma, + GFP_KERNEL); + set_dev_node(dev, orig_node); + if (!rx_ring->desc) + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + if (!rx_ring->desc) + goto err; + +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + ixgbe_init_rx_page_offset(rx_ring); + +#endif + +#endif /* NO_VNIC */ + return 0; +err: + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); + return -ENOMEM; +} + +/** + * ixgbe_setup_all_rx_resources - allocate all queues Rx resources + * @adapter: board private structure + * + * If this function returns with an error, then it's possible one or + * more of the rings is populated (while the rest are not). It is the + * callers duty to clean those orphaned rings. + * + * Return 0 on success, negative on failure + **/ +static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter) +{ + int i, err = 0; + + for (i = 0; i < adapter->num_rx_queues; i++) { + err = ixgbe_setup_rx_resources(adapter->rx_ring[i]); + if (!err) + continue; + e_err(probe, "Allocation for Rx Queue %u failed\n", i); + break; + } + + return err; +} + +/** + * ixgbe_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + **/ +void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring) +{ + //ixgbe_clean_tx_ring(tx_ring); + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!tx_ring->desc) + return; + + //dma_free_coherent(tx_ring->dev, tx_ring->size, + // tx_ring->desc, tx_ring->dma); + + tx_ring->desc = NULL; +} + +/** + * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + **/ +static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + if (adapter->tx_ring[i]->desc) + ixgbe_free_tx_resources(adapter->tx_ring[i]); +} + +/** + * ixgbe_free_rx_resources - Free Rx Resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + **/ +void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) +{ + //ixgbe_clean_rx_ring(rx_ring); + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!rx_ring->desc) + return; + + //dma_free_coherent(rx_ring->dev, rx_ring->size, + // rx_ring->desc, rx_ring->dma); + + rx_ring->desc = NULL; +} + +/** + * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + **/ +static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + if (adapter->rx_ring[i]->desc) + ixgbe_free_rx_resources(adapter->rx_ring[i]); +} + + +/** + * ixgbe_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + **/ +//static +int ixgbe_open(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int err; + + /* disallow open during test */ + if (test_bit(__IXGBE_TESTING, &adapter->state)) + return -EBUSY; + + netif_carrier_off(netdev); + + /* allocate transmit descriptors */ + err = ixgbe_setup_all_tx_resources(adapter); + if (err) + goto err_setup_tx; + + /* allocate receive descriptors */ + err = ixgbe_setup_all_rx_resources(adapter); + if (err) + goto err_setup_rx; + +#ifdef NO_VNIC + ixgbe_configure(adapter); + + err = ixgbe_request_irq(adapter); + if (err) + goto err_req_irq; + + ixgbe_up_complete(adapter); + +err_req_irq: +#else + return 0; +#endif +err_setup_rx: + ixgbe_free_all_rx_resources(adapter); +err_setup_tx: + ixgbe_free_all_tx_resources(adapter); + ixgbe_reset(adapter); + + return err; +} + +/** + * ixgbe_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + **/ +//static +int ixgbe_close(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + //ixgbe_down(adapter); + //ixgbe_free_irq(adapter); + + //ixgbe_fdir_filter_exit(adapter); + + //ixgbe_free_all_tx_resources(adapter); + //ixgbe_free_all_rx_resources(adapter); + + ixgbe_release_hw_control(adapter); + + return 0; +} + + + + + +/** + * ixgbe_get_stats - Get System Network Statistics + * @netdev: network interface device structure + * + * Returns the address of the device statistics structure. + * The statistics are actually updated from the timer callback. + **/ +//static +struct net_device_stats *ixgbe_get_stats(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + /* update the stats data */ + ixgbe_update_stats(adapter); + +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + /* only return the current stats */ + return &netdev->stats; +#else + /* only return the current stats */ + return &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ +} + +/** + * ixgbe_update_stats - Update the board statistics counters. + * @adapter: board private structure + **/ +void ixgbe_update_stats(struct ixgbe_adapter *adapter) +{ +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &adapter->netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw_stats *hwstats = &adapter->stats; + u64 total_mpc = 0; + u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot; + u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0; + u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0; + u64 bytes = 0, packets = 0, hw_csum_rx_error = 0; +#ifndef IXGBE_NO_LRO + u32 flushed = 0, coal = 0; + int num_q_vectors = 1; +#endif +#ifdef IXGBE_FCOE + struct ixgbe_fcoe *fcoe = &adapter->fcoe; + unsigned int cpu; + u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0; +#endif /* IXGBE_FCOE */ + + printk(KERN_DEBUG "ixgbe_update_stats, tx_queues=%d, rx_queues=%d\n", + adapter->num_tx_queues, adapter->num_rx_queues); + + if (test_bit(__IXGBE_DOWN, &adapter->state) || + test_bit(__IXGBE_RESETTING, &adapter->state)) + return; + +#ifndef IXGBE_NO_LRO + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + +#endif + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + u64 rsc_count = 0; + u64 rsc_flush = 0; + for (i = 0; i < adapter->num_rx_queues; i++) { + rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count; + rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush; + } + adapter->rsc_total_count = rsc_count; + adapter->rsc_total_flush = rsc_flush; + } + +#ifndef IXGBE_NO_LRO + for (i = 0; i < num_q_vectors; i++) { + struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; + if (!q_vector) + continue; + flushed += q_vector->lrolist.stats.flushed; + coal += q_vector->lrolist.stats.coal; + } + adapter->lro_stats.flushed = flushed; + adapter->lro_stats.coal = coal; + +#endif + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; + non_eop_descs += rx_ring->rx_stats.non_eop_descs; + alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; + alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; + hw_csum_rx_error += rx_ring->rx_stats.csum_err; + bytes += rx_ring->stats.bytes; + packets += rx_ring->stats.packets; + + } + adapter->non_eop_descs = non_eop_descs; + adapter->alloc_rx_page_failed = alloc_rx_page_failed; + adapter->alloc_rx_buff_failed = alloc_rx_buff_failed; + adapter->hw_csum_rx_error = hw_csum_rx_error; + net_stats->rx_bytes = bytes; + net_stats->rx_packets = packets; + + bytes = 0; + packets = 0; + /* gather some stats to the adapter struct that are per queue */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; + restart_queue += tx_ring->tx_stats.restart_queue; + tx_busy += tx_ring->tx_stats.tx_busy; + bytes += tx_ring->stats.bytes; + packets += tx_ring->stats.packets; + } + adapter->restart_queue = restart_queue; + adapter->tx_busy = tx_busy; + net_stats->tx_bytes = bytes; + net_stats->tx_packets = packets; + + hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); + + /* 8 register reads */ + for (i = 0; i < 8; i++) { + /* for packet buffers not used, the register should read 0 */ + mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i)); + missed_rx += mpc; + hwstats->mpc[i] += mpc; + total_mpc += hwstats->mpc[i]; + hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); + hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + hwstats->pxonrxc[i] += + IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + hwstats->pxonrxc[i] += + IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); + break; + default: + break; + } + } + + /*16 register reads */ + for (i = 0; i < 16; i++) { + hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + if ((hw->mac.type == ixgbe_mac_82599EB) || + (hw->mac.type == ixgbe_mac_X540)) { + hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */ + hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */ + } + } + + hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); + /* work around hardware counting issue */ + hwstats->gprc -= missed_rx; + + ixgbe_update_xoff_received(adapter); + + /* 82598 hardware only has a 32 bit counter in the high register */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); + hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); + hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); + hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH); + break; + case ixgbe_mac_X540: + /* OS2BMC stats are X540 only*/ + hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC); + hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC); + hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC); + hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC); + case ixgbe_mac_82599EB: + for (i = 0; i < 16; i++) + adapter->hw_rx_no_dma_resources += + IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); + hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL); + IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */ + hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL); + IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */ + hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL); + IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */ + hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); +#ifdef HAVE_TX_MQ + hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); + hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS); +#endif /* HAVE_TX_MQ */ +#ifdef IXGBE_FCOE + hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); + hwstats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); + hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); + hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); + hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); + hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); + hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); + /* Add up per cpu counters for total ddp aloc fail */ + if (fcoe && fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) { + for_each_possible_cpu(cpu) { + fcoe_noddp_counts_sum += + *per_cpu_ptr(fcoe->pcpu_noddp, cpu); + fcoe_noddp_ext_buff_counts_sum += + *per_cpu_ptr(fcoe-> + pcpu_noddp_ext_buff, cpu); + } + } + hwstats->fcoe_noddp = fcoe_noddp_counts_sum; + hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum; + +#endif /* IXGBE_FCOE */ + break; + default: + break; + } + bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); + hwstats->bprc += bprc; + hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); + if (hw->mac.type == ixgbe_mac_82598EB) + hwstats->mprc -= bprc; + hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC); + hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); + hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); + hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); + hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); + hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); + hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); + hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); + lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); + hwstats->lxontxc += lxon; + lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); + hwstats->lxofftxc += lxoff; + hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); + hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); + /* + * 82598 errata - tx of flow control packets is included in tx counters + */ + xon_off_tot = lxon + lxoff; + hwstats->gptc -= xon_off_tot; + hwstats->mptc -= xon_off_tot; + hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN)); + hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC); + hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC); + hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC); + hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR); + hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); + hwstats->ptc64 -= xon_off_tot; + hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); + hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); + hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); + hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); + hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); + hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); + /* Fill out the OS statistics structure */ + net_stats->multicast = hwstats->mprc; + + /* Rx Errors */ + net_stats->rx_errors = hwstats->crcerrs + + hwstats->rlec; + net_stats->rx_dropped = 0; + net_stats->rx_length_errors = hwstats->rlec; + net_stats->rx_crc_errors = hwstats->crcerrs; + net_stats->rx_missed_errors = total_mpc; + + /* + * VF Stats Collection - skip while resetting because these + * are not clear on read and otherwise you'll sometimes get + * crazy values. + */ + if (!test_bit(__IXGBE_RESETTING, &adapter->state)) { + for (i = 0; i < adapter->num_vfs; i++) { + UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i), \ + adapter->vfinfo[i].last_vfstats.gprc, \ + adapter->vfinfo[i].vfstats.gprc); + UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i), \ + adapter->vfinfo[i].last_vfstats.gptc, \ + adapter->vfinfo[i].vfstats.gptc); + UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i), \ + IXGBE_PVFGORC_MSB(i), \ + adapter->vfinfo[i].last_vfstats.gorc, \ + adapter->vfinfo[i].vfstats.gorc); + UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i), \ + IXGBE_PVFGOTC_MSB(i), \ + adapter->vfinfo[i].last_vfstats.gotc, \ + adapter->vfinfo[i].vfstats.gotc); + UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i), \ + adapter->vfinfo[i].last_vfstats.mprc, \ + adapter->vfinfo[i].vfstats.mprc); + } + } +} + + +#ifdef NO_VNIC + +/** + * ixgbe_watchdog_update_link - update the link status + * @adapter - pointer to the device adapter structure + * @link_speed - pointer to a u32 to store the link_speed + **/ +static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 link_speed = adapter->link_speed; + bool link_up = adapter->link_up; + bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; + + if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) + return; + + if (hw->mac.ops.check_link) { + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + } else { + /* always assume link is up, if no check link function */ + link_speed = IXGBE_LINK_SPEED_10GB_FULL; + link_up = true; + } + +#ifdef HAVE_DCBNL_IEEE + if (adapter->ixgbe_ieee_pfc) + pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); + +#endif + if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) { + hw->mac.ops.fc_enable(hw); + //ixgbe_set_rx_drop_en(adapter); + } + + if (link_up || + time_after(jiffies, (adapter->link_check_timeout + + IXGBE_TRY_LINK_TIMEOUT))) { + adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC); + IXGBE_WRITE_FLUSH(hw); + } + + adapter->link_up = link_up; + adapter->link_speed = link_speed; +} +#endif + + + +#ifdef NO_VNIC + +/** + * ixgbe_service_task - manages and runs subtasks + * @work: pointer to work_struct containing our data + **/ +static void ixgbe_service_task(struct work_struct *work) +{ + //struct ixgbe_adapter *adapter = container_of(work, + // struct ixgbe_adapter, + // service_task); + + //ixgbe_reset_subtask(adapter); + //ixgbe_sfp_detection_subtask(adapter); + //ixgbe_sfp_link_config_subtask(adapter); + //ixgbe_check_overtemp_subtask(adapter); + //ixgbe_watchdog_subtask(adapter); +#ifdef HAVE_TX_MQ + //ixgbe_fdir_reinit_subtask(adapter); +#endif + //ixgbe_check_hang_subtask(adapter); + + //ixgbe_service_event_complete(adapter); +} + + + + +#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ + IXGBE_TXD_CMD_RS) + + +/** + * ixgbe_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int ixgbe_set_mac(struct net_device *netdev, void *p) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + struct sockaddr *addr = p; + int ret; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + ixgbe_del_mac_filter(adapter, hw->mac.addr, + adapter->num_vfs); + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + + + /* set the correct pool for the new PF MAC address in entry 0 */ + ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, + adapter->num_vfs); + return ret > 0 ? 0 : ret; +} + + +/** + * ixgbe_ioctl - + * @netdev: + * @ifreq: + * @cmd: + **/ +static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { +#ifdef ETHTOOL_OPS_COMPAT + case SIOCETHTOOL: + return ethtool_ioctl(ifr); +#endif + default: + return -EOPNOTSUPP; + } +} +#endif /* NO_VNIC */ + + +void ixgbe_do_reset(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + else + ixgbe_reset(adapter); +} + + + + + + +/** + * ixgbe_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in ixgbe_pci_tbl + * + * Returns 0 on success, negative on failure + * + * ixgbe_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +//static +int ixgbe_kni_probe(struct pci_dev *pdev, + struct net_device **lad_dev) +{ + size_t count; + struct net_device *netdev; + struct ixgbe_adapter *adapter = NULL; + struct ixgbe_hw *hw = NULL; + static int cards_found; + int i, err; + u16 offset; + u16 eeprom_verh, eeprom_verl, eeprom_cfg_blkh, eeprom_cfg_blkl; + u32 etrack_id; + u16 build, major, patch; + char *info_string, *i_s_var; + u8 part_str[IXGBE_PBANUM_LENGTH]; + enum ixgbe_mac_type mac_type = ixgbe_mac_unknown; +#ifdef HAVE_TX_MQ + unsigned int indices = num_possible_cpus(); +#endif /* HAVE_TX_MQ */ +#ifdef IXGBE_FCOE + u16 device_caps; +#endif + u16 wol_cap; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + +#ifdef NO_VNIC + err = pci_request_selected_regions(pdev, pci_select_bars(pdev, + IORESOURCE_MEM), ixgbe_driver_name); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "pci_request_selected_regions failed 0x%x\n", err); + goto err_pci_reg; + } +#endif + + /* + * The mac_type is needed before we have the adapter is set up + * so rather than maintain two devID -> MAC tables we dummy up + * an ixgbe_hw stuct and use ixgbe_set_mac_type. + */ + hw = vmalloc(sizeof(struct ixgbe_hw)); + if (!hw) { + pr_info("Unable to allocate memory for early mac " + "check\n"); + } else { + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + ixgbe_set_mac_type(hw); + mac_type = hw->mac.type; + vfree(hw); + } + +#ifdef NO_VNIC + /* + * Workaround of Silicon errata on 82598. Disable LOs in the PCI switch + * port to which the 82598 is connected to prevent duplicate + * completions caused by LOs. We need the mac type so that we only + * do this on 82598 devices, ixgbe_set_mac_type does this for us if + * we set it's device ID. + */ + if (mac_type == ixgbe_mac_82598EB) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); +#endif + +#ifdef HAVE_TX_MQ +#ifdef CONFIG_DCB +#ifdef HAVE_MQPRIO + indices *= IXGBE_DCB_MAX_TRAFFIC_CLASS; +#else + indices = max_t(unsigned int, indices, IXGBE_MAX_DCB_INDICES); +#endif /* HAVE_MQPRIO */ +#endif /* CONFIG_DCB */ + + if (mac_type == ixgbe_mac_82598EB) + indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES); + else + indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES); + +#ifdef IXGBE_FCOE + indices += min_t(unsigned int, num_possible_cpus(), + IXGBE_MAX_FCOE_INDICES); +#endif + netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); +#else /* HAVE_TX_MQ */ + netdev = alloc_etherdev(sizeof(struct ixgbe_adapter)); +#endif /* HAVE_TX_MQ */ + if (!netdev) { + err = -ENOMEM; + goto err_alloc_etherdev; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + adapter = netdev_priv(netdev); + //pci_set_drvdata(pdev, adapter); + + adapter->netdev = netdev; + adapter->pdev = pdev; + hw = &adapter->hw; + hw->back = adapter; + adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; + +#ifdef HAVE_PCI_ERS + /* + * call save state here in standalone driver because it relies on + * adapter struct to exist, and needs to call netdev_priv + */ + pci_save_state(pdev); + +#endif + hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!hw->hw_addr) { + err = -EIO; + goto err_ioremap; + } + //ixgbe_assign_netdev_ops(netdev); + ixgbe_set_ethtool_ops(netdev); + + strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name)); + + adapter->bd_number = cards_found; + + /* setup the private structure */ + err = ixgbe_sw_init(adapter); + if (err) + goto err_sw_init; + + /* Make it possible the adapter to be woken up via WOL */ + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); + break; + default: + break; + } + + /* + * check_options must be called before setup_link to set up + * hw->fc completely + */ + //ixgbe_check_options(adapter); + +#ifndef NO_VNIC + /* reset_hw fills in the perm_addr as well */ + hw->phy.reset_if_overtemp = true; + err = hw->mac.ops.reset_hw(hw); + hw->phy.reset_if_overtemp = false; + if (err == IXGBE_ERR_SFP_NOT_PRESENT && + hw->mac.type == ixgbe_mac_82598EB) { + err = 0; + } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + e_dev_err("failed to load because an unsupported SFP+ " + "module type was detected.\n"); + e_dev_err("Reload the driver after installing a supported " + "module.\n"); + goto err_sw_init; + } else if (err) { + e_dev_err("HW Init failed: %d\n", err); + goto err_sw_init; + } +#endif + + //if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + // ixgbe_probe_vf(adapter); + + +#ifdef MAX_SKB_FRAGS + netdev->features |= NETIF_F_SG | + NETIF_F_IP_CSUM; + +#ifdef NETIF_F_IPV6_CSUM + netdev->features |= NETIF_F_IPV6_CSUM; +#endif + +#ifdef NETIF_F_HW_VLAN_TX + netdev->features |= NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_RX; +#endif +#ifdef NETIF_F_TSO + netdev->features |= NETIF_F_TSO; +#endif /* NETIF_F_TSO */ +#ifdef NETIF_F_TSO6 + netdev->features |= NETIF_F_TSO6; +#endif /* NETIF_F_TSO6 */ +#ifdef NETIF_F_RXHASH + netdev->features |= NETIF_F_RXHASH; +#endif /* NETIF_F_RXHASH */ + +#ifdef HAVE_NDO_SET_FEATURES + netdev->features |= NETIF_F_RXCSUM; + + /* copy netdev features into list of user selectable features */ + netdev->hw_features |= netdev->features; + + /* give us the option of enabling RSC/LRO later */ +#ifdef IXGBE_NO_LRO + if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) +#endif + netdev->hw_features |= NETIF_F_LRO; + +#else +#ifdef NETIF_F_GRO + + /* this is only needed on kernels prior to 2.6.39 */ + netdev->features |= NETIF_F_GRO; +#endif /* NETIF_F_GRO */ +#endif + +#ifdef NETIF_F_HW_VLAN_TX + /* set this bit last since it cannot be part of hw_features */ + netdev->features |= NETIF_F_HW_VLAN_FILTER; +#endif + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + netdev->features |= NETIF_F_SCTP_CSUM; +#ifdef HAVE_NDO_SET_FEATURES + netdev->hw_features |= NETIF_F_SCTP_CSUM | + NETIF_F_NTUPLE; +#endif + break; + default: + break; + } + +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_SG | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6; + +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + /* + * If perfect filters were enabled in check_options(), enable them + * on the netdevice too. + */ + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) + netdev->features |= NETIF_F_NTUPLE; + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + /* clear n-tuple support in the netdev unconditionally */ + netdev->features &= ~NETIF_F_NTUPLE; + } + +#ifdef NETIF_F_RXHASH + if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) + netdev->features &= ~NETIF_F_RXHASH; + +#endif /* NETIF_F_RXHASH */ + if (netdev->features & NETIF_F_LRO) { + if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) && + ((adapter->rx_itr_setting == 1) || + (adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR))) { + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + } else if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) { +#ifdef IXGBE_NO_LRO + e_info(probe, "InterruptThrottleRate set too high, " + "disabling RSC\n"); +#else + e_info(probe, "InterruptThrottleRate set too high, " + "falling back to software LRO\n"); +#endif + } + } +#ifdef CONFIG_DCB + //netdev->dcbnl_ops = &dcbnl_ops; +#endif + +#ifdef IXGBE_FCOE +#ifdef NETIF_F_FSO + if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) { + ixgbe_get_device_caps(hw, &device_caps); + if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) { + adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; + adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE; + e_info(probe, "FCoE offload feature is not available. " + "Disabling FCoE offload feature\n"); + } +#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE + else { + adapter->flags |= IXGBE_FLAG_FCOE_ENABLED; + adapter->ring_feature[RING_F_FCOE].indices = + IXGBE_FCRETA_SIZE; + netdev->features |= NETIF_F_FSO | + NETIF_F_FCOE_CRC | + NETIF_F_FCOE_MTU; + netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1; + } +#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */ +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_FSO | + NETIF_F_FCOE_CRC | + NETIF_F_FCOE_MTU; +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + } +#endif /* NETIF_F_FSO */ +#endif /* IXGBE_FCOE */ + +#endif /* MAX_SKB_FRAGS */ + /* make sure the EEPROM is good */ + if (hw->eeprom.ops.validate_checksum && + (hw->eeprom.ops.validate_checksum(hw, NULL) < 0)) { + e_dev_err("The EEPROM Checksum Is Not Valid\n"); + err = -EIO; + goto err_sw_init; + } + + memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); +#ifdef ETHTOOL_GPERMADDR + memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len); + + if (ixgbe_validate_mac_addr(netdev->perm_addr)) { + e_dev_err("invalid MAC address\n"); + err = -EIO; + goto err_sw_init; + } +#else + if (ixgbe_validate_mac_addr(netdev->dev_addr)) { + e_dev_err("invalid MAC address\n"); + err = -EIO; + goto err_sw_init; + } +#endif + memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr, + netdev->addr_len); + adapter->mac_table[0].queue = adapter->num_vfs; + adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | + IXGBE_MAC_STATE_IN_USE); + hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, + adapter->mac_table[0].queue, + IXGBE_RAH_AV); + + //setup_timer(&adapter->service_timer, &ixgbe_service_timer, + // (unsigned long) adapter); + + //INIT_WORK(&adapter->service_task, ixgbe_service_task); + //clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state); + + //err = ixgbe_init_interrupt_scheme(adapter); + //if (err) + // goto err_sw_init; + + //adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + ixgbe_set_num_queues(adapter); + + adapter->wol = 0; + /* WOL not supported for all but the following */ + switch (pdev->device) { + case IXGBE_DEV_ID_82599_SFP: + /* Only these subdevice supports WOL */ + switch (pdev->subsystem_device) { + case IXGBE_SUBDEV_ID_82599_560FLR: + /* only support first port */ + if (hw->bus.func != 0) + break; + case IXGBE_SUBDEV_ID_82599_SFP: + adapter->wol = IXGBE_WUFC_MAG; + break; + } + break; + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + /* All except this subdevice support WOL */ + if (pdev->subsystem_device != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) + adapter->wol = IXGBE_WUFC_MAG; + break; + case IXGBE_DEV_ID_82599_KX4: + adapter->wol = IXGBE_WUFC_MAG; + break; + case IXGBE_DEV_ID_X540T: + /* Check eeprom to see if it is enabled */ + ixgbe_read_eeprom(hw, 0x2c, &adapter->eeprom_cap); + wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; + + if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && + (hw->bus.func == 0))) + adapter->wol = IXGBE_WUFC_MAG; + break; + } + //device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + + /* + * Save off EEPROM version number and Option Rom version which + * together make a unique identify for the eeprom + */ + ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh); + ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl); + + etrack_id = (eeprom_verh << 16) | eeprom_verl; + + ixgbe_read_eeprom(hw, 0x17, &offset); + + /* Make sure offset to SCSI block is valid */ + if (!(offset == 0x0) && !(offset == 0xffff)) { + ixgbe_read_eeprom(hw, offset + 0x84, &eeprom_cfg_blkh); + ixgbe_read_eeprom(hw, offset + 0x83, &eeprom_cfg_blkl); + + /* Only display Option Rom if exist */ + if (eeprom_cfg_blkl && eeprom_cfg_blkh) { + major = eeprom_cfg_blkl >> 8; + build = (eeprom_cfg_blkl << 8) | (eeprom_cfg_blkh >> 8); + patch = eeprom_cfg_blkh & 0x00ff; + + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x, %d.%d.%d", etrack_id, major, build, + patch); + } else { + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x", etrack_id); + } + } else { + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x", etrack_id); + } + + /* reset the hardware with the new settings */ + err = hw->mac.ops.start_hw(hw); + if (err == IXGBE_ERR_EEPROM_VERSION) { + /* We are running on a pre-production device, log a warning */ + e_dev_warn("This device is a pre-production adapter/LOM. " + "Please be aware there may be issues associated " + "with your hardware. If you are experiencing " + "problems please contact your Intel or hardware " + "representative who provided you with this " + "hardware.\n"); + } + /* pick up the PCI bus settings for reporting later */ + if (hw->mac.ops.get_bus_info) + hw->mac.ops.get_bus_info(hw); + + strlcpy(netdev->name, "eth%d", sizeof(netdev->name)); + *lad_dev = netdev; + + adapter->netdev_registered = true; +#ifdef NO_VNIC + /* power down the optics */ + if ((hw->phy.multispeed_fiber) || + ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && + (hw->mac.type == ixgbe_mac_82599EB))) + ixgbe_disable_tx_laser(hw); + + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + /* keep stopping all the transmit queues for older kernels */ + netif_tx_stop_all_queues(netdev); +#endif + + /* print all messages at the end so that we use our eth%d name */ + /* print bus type/speed/width info */ + e_dev_info("(PCI Express:%s:%s) ", + (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" : + hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : + "Unknown"), + (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" : + hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" : + hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : + "Unknown")); + + /* print the MAC address */ + for (i = 0; i < 6; i++) + pr_cont("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); + + /* First try to read PBA as a string */ + err = ixgbe_read_pba_string(hw, part_str, IXGBE_PBANUM_LENGTH); + if (err) + strlcpy(part_str, "Unknown", sizeof(part_str)); + if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present) + e_info(probe, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n", + hw->mac.type, hw->phy.type, hw->phy.sfp_type, part_str); + else + e_info(probe, "MAC: %d, PHY: %d, PBA No: %s\n", + hw->mac.type, hw->phy.type, part_str); + + if (((hw->bus.speed == ixgbe_bus_speed_2500) && + (hw->bus.width <= ixgbe_bus_width_pcie_x4)) || + (hw->bus.width <= ixgbe_bus_width_pcie_x2)) { + e_dev_warn("PCI-Express bandwidth available for this " + "card is not sufficient for optimal " + "performance.\n"); + e_dev_warn("For optimal performance a x8 PCI-Express " + "slot is required.\n"); + } + +#define INFO_STRING_LEN 255 + info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL); + if (!info_string) { + e_err(probe, "allocation for info string failed\n"); + goto no_info_string; + } + count = 0; + i_s_var = info_string; + count += snprintf(i_s_var, INFO_STRING_LEN, "Enabled Features: "); + + i_s_var = info_string + count; + count += snprintf(i_s_var, (INFO_STRING_LEN - count), + "RxQ: %d TxQ: %d ", adapter->num_rx_queues, + adapter->num_tx_queues); + i_s_var = info_string + count; +#ifdef IXGBE_FCOE + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "FCoE "); + i_s_var = info_string + count; + } +#endif + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, + "FdirHash "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, + "FdirPerfect "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCB "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSS "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCA "); + i_s_var = info_string + count; + } + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSC "); + i_s_var = info_string + count; + } +#ifndef IXGBE_NO_LRO + else if (netdev->features & NETIF_F_LRO) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "LRO "); + i_s_var = info_string + count; + } +#endif + + BUG_ON(i_s_var > (info_string + INFO_STRING_LEN)); + /* end features printing */ + e_info(probe, "%s\n", info_string); + kfree(info_string); +no_info_string: + + /* firmware requires blank driver version */ + ixgbe_set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF); + +#if defined(HAVE_NETDEV_STORAGE_ADDRESS) && defined(NETDEV_HW_ADDR_T_SAN) + /* add san mac addr to netdev */ + //ixgbe_add_sanmac_netdev(netdev); + +#endif /* (HAVE_NETDEV_STORAGE_ADDRESS) && (NETDEV_HW_ADDR_T_SAN) */ + e_info(probe, "Intel(R) 10 Gigabit Network Connection\n"); + cards_found++; + +#ifdef IXGBE_SYSFS + //if (ixgbe_sysfs_init(adapter)) + // e_err(probe, "failed to allocate sysfs resources\n"); +#else +#ifdef IXGBE_PROCFS + //if (ixgbe_procfs_init(adapter)) + // e_err(probe, "failed to allocate procfs resources\n"); +#endif /* IXGBE_PROCFS */ +#endif /* IXGBE_SYSFS */ + + return 0; + +//err_register: + //ixgbe_clear_interrupt_scheme(adapter); + //ixgbe_release_hw_control(adapter); +err_sw_init: + adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; + if (adapter->mac_table) + kfree(adapter->mac_table); + iounmap(hw->hw_addr); +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: + //pci_release_selected_regions(pdev, + // pci_select_bars(pdev, IORESOURCE_MEM)); +//err_pci_reg: +//err_dma: + pci_disable_device(pdev); + return err; +} + +/** + * ixgbe_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ixgbe_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +void ixgbe_kni_remove(struct pci_dev *pdev) +{ + pci_disable_device(pdev); +} + + +u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg) +{ + u16 value; + struct ixgbe_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, &value); + return value; +} + +void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value) +{ + struct ixgbe_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, value); +} + +void ewarn(struct ixgbe_hw *hw, const char *st, u32 status) +{ + struct ixgbe_adapter *adapter = hw->back; + + netif_warn(adapter, drv, adapter->netdev, "%s", st); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h new file mode 100644 index 00000000..124f00de --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h @@ -0,0 +1,105 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_MBX_H_ +#define _IXGBE_MBX_H_ + +#include "ixgbe_type.h" + +#define IXGBE_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ +#define IXGBE_ERR_MBX -100 + +#define IXGBE_VFMAILBOX 0x002FC +#define IXGBE_VFMBMEM 0x00200 + +/* Define mailbox register bits */ +#define IXGBE_VFMAILBOX_REQ 0x00000001 /* Request for PF Ready bit */ +#define IXGBE_VFMAILBOX_ACK 0x00000002 /* Ack PF message received */ +#define IXGBE_VFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ +#define IXGBE_VFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ +#define IXGBE_VFMAILBOX_PFSTS 0x00000010 /* PF wrote a message in the MB */ +#define IXGBE_VFMAILBOX_PFACK 0x00000020 /* PF ack the previous VF msg */ +#define IXGBE_VFMAILBOX_RSTI 0x00000040 /* PF has reset indication */ +#define IXGBE_VFMAILBOX_RSTD 0x00000080 /* PF has indicated reset done */ +#define IXGBE_VFMAILBOX_R2C_BITS 0x000000B0 /* All read to clear bits */ + +#define IXGBE_PFMAILBOX_STS 0x00000001 /* Initiate message send to VF */ +#define IXGBE_PFMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ +#define IXGBE_PFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ +#define IXGBE_PFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ +#define IXGBE_PFMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ + +#define IXGBE_MBVFICR_VFREQ_MASK 0x0000FFFF /* bits for VF messages */ +#define IXGBE_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ +#define IXGBE_MBVFICR_VFACK_MASK 0xFFFF0000 /* bits for VF acks */ +#define IXGBE_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ + + +/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the + * PF. The reverse is true if it is IXGBE_PF_*. + * Message ACK's are the value or'd with 0xF0000000 + */ +#define IXGBE_VT_MSGTYPE_ACK 0x80000000 /* Messages below or'd with + * this are the ACK */ +#define IXGBE_VT_MSGTYPE_NACK 0x40000000 /* Messages below or'd with + * this are the NACK */ +#define IXGBE_VT_MSGTYPE_CTS 0x20000000 /* Indicates that VF is still + * clear to send requests */ +#define IXGBE_VT_MSGINFO_SHIFT 16 +/* bits 23:16 are used for extra info for certain messages */ +#define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) + +#define IXGBE_VF_RESET 0x01 /* VF requests reset */ +#define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ +#define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ +#define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ +#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ +#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ + +/* length of permanent address message returned from PF */ +#define IXGBE_VF_PERMADDR_MSG_LEN 4 +/* word in permanent address message with the current multicast type */ +#define IXGBE_VF_MC_TYPE_WORD 3 + +#define IXGBE_PF_CONTROL_MSG 0x0100 /* PF control message */ + + +#define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ +#define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ + +s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_read_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_write_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16); +s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16); +s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16); +void ixgbe_init_mbx_ops_generic(struct ixgbe_hw *hw); +void ixgbe_init_mbx_params_vf(struct ixgbe_hw *); +void ixgbe_init_mbx_params_pf(struct ixgbe_hw *); + +#endif /* _IXGBE_MBX_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h new file mode 100644 index 00000000..d161600b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h @@ -0,0 +1,132 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +/* glue for the OS independent part of ixgbe + * includes register access macros + */ + +#ifndef _IXGBE_OSDEP_H_ +#define _IXGBE_OSDEP_H_ + +#include +#include +#include +#include +#include +#include "kcompat.h" + + +#ifndef msleep +#define msleep(x) do { if (in_interrupt()) { \ + /* Don't mdelay in interrupt context! */ \ + BUG(); \ + } else { \ + msleep(x); \ + } } while (0) + +#endif + +#undef ASSERT + +#ifdef DBG +#define hw_dbg(hw, S, A...) printk(KERN_DEBUG S, ## A) +#else +#define hw_dbg(hw, S, A...) do {} while (0) +#endif + +#define e_dev_info(format, arg...) \ + dev_info(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_dev_warn(format, arg...) \ + dev_warn(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_dev_err(format, arg...) \ + dev_err(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_dev_notice(format, arg...) \ + dev_notice(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_info(msglvl, format, arg...) \ + netif_info(adapter, msglvl, adapter->netdev, format, ## arg) +#define e_err(msglvl, format, arg...) \ + netif_err(adapter, msglvl, adapter->netdev, format, ## arg) +#define e_warn(msglvl, format, arg...) \ + netif_warn(adapter, msglvl, adapter->netdev, format, ## arg) +#define e_crit(msglvl, format, arg...) \ + netif_crit(adapter, msglvl, adapter->netdev, format, ## arg) + + +#ifdef DBG +#define IXGBE_WRITE_REG(a, reg, value) do {\ + switch (reg) { \ + case IXGBE_EIMS: \ + case IXGBE_EIMC: \ + case IXGBE_EIAM: \ + case IXGBE_EIAC: \ + case IXGBE_EICR: \ + case IXGBE_EICS: \ + printk("%s: Reg - 0x%05X, value - 0x%08X\n", __func__, \ + reg, (u32)(value)); \ + default: \ + break; \ + } \ + writel((value), ((a)->hw_addr + (reg))); \ +} while (0) +#else +#define IXGBE_WRITE_REG(a, reg, value) writel((value), ((a)->hw_addr + (reg))) +#endif + +#define IXGBE_READ_REG(a, reg) readl((a)->hw_addr + (reg)) + +#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) ( \ + writel((value), ((a)->hw_addr + (reg) + ((offset) << 2)))) + +#define IXGBE_READ_REG_ARRAY(a, reg, offset) ( \ + readl((a)->hw_addr + (reg) + ((offset) << 2))) + +#ifndef writeq +#define writeq(val, addr) do { writel((u32) (val), addr); \ + writel((u32) (val >> 32), (addr + 4)); \ + } while (0); +#endif + +#define IXGBE_WRITE_REG64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) + +#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS) +struct ixgbe_hw; +extern u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg); +extern void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value); +extern void ewarn(struct ixgbe_hw *hw, const char *str, u32 status); + +#define IXGBE_READ_PCIE_WORD ixgbe_read_pci_cfg_word +#define IXGBE_WRITE_PCIE_WORD ixgbe_write_pci_cfg_word +#define IXGBE_EEPROM_GRANT_ATTEMPS 100 +#define IXGBE_HTONL(_i) htonl(_i) +#define IXGBE_NTOHL(_i) ntohl(_i) +#define IXGBE_NTOHS(_i) ntohs(_i) +#define IXGBE_CPU_TO_LE32(_i) cpu_to_le32(_i) +#define IXGBE_LE32_TO_CPUS(_i) le32_to_cpus(_i) +#define EWARN(H, W, S) ewarn(H, W, S) + +#endif /* _IXGBE_OSDEP_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c new file mode 100644 index 00000000..e3f5275e --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c @@ -0,0 +1,1847 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static void ixgbe_i2c_start(struct ixgbe_hw *hw); +static void ixgbe_i2c_stop(struct ixgbe_hw *hw); +static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data); +static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data); +static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw); +static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data); +static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data); +static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl); +static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl); +static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data); +static bool ixgbe_get_i2c_data(u32 *i2cctl); + +/** + * ixgbe_init_phy_ops_generic - Inits PHY function ptrs + * @hw: pointer to the hardware structure + * + * Initialize the function pointers. + **/ +s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_phy_info *phy = &hw->phy; + + /* PHY */ + phy->ops.identify = &ixgbe_identify_phy_generic; + phy->ops.reset = &ixgbe_reset_phy_generic; + phy->ops.read_reg = &ixgbe_read_phy_reg_generic; + phy->ops.write_reg = &ixgbe_write_phy_reg_generic; + phy->ops.setup_link = &ixgbe_setup_phy_link_generic; + phy->ops.setup_link_speed = &ixgbe_setup_phy_link_speed_generic; + phy->ops.check_link = NULL; + phy->ops.get_firmware_version = ixgbe_get_phy_firmware_version_generic; + phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_generic; + phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_generic; + phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_generic; + phy->ops.write_i2c_eeprom = &ixgbe_write_i2c_eeprom_generic; + phy->ops.i2c_bus_clear = &ixgbe_i2c_bus_clear; + phy->ops.identify_sfp = &ixgbe_identify_module_generic; + phy->sfp_type = ixgbe_sfp_type_unknown; + phy->ops.check_overtemp = &ixgbe_tn_check_overtemp; + return 0; +} + +/** + * ixgbe_identify_phy_generic - Get physical layer module + * @hw: pointer to hardware structure + * + * Determines the physical layer module found on the current adapter. + **/ +s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + u32 phy_addr; + u16 ext_ability = 0; + + if (hw->phy.type == ixgbe_phy_unknown) { + for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { + if (ixgbe_validate_phy_addr(hw, phy_addr)) { + hw->phy.addr = phy_addr; + ixgbe_get_phy_id(hw); + hw->phy.type = + ixgbe_get_phy_type_from_id(hw->phy.id); + + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &ext_ability); + if (ext_ability & + (IXGBE_MDIO_PHY_10GBASET_ABILITY | + IXGBE_MDIO_PHY_1000BASET_ABILITY)) + hw->phy.type = + ixgbe_phy_cu_unknown; + else + hw->phy.type = + ixgbe_phy_generic; + } + + status = 0; + break; + } + } + /* clear value if nothing found */ + if (status != 0) + hw->phy.addr = 0; + } else { + status = 0; + } + + return status; +} + +/** + * ixgbe_validate_phy_addr - Determines phy address is valid + * @hw: pointer to hardware structure + * + **/ +bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr) +{ + u16 phy_id = 0; + bool valid = false; + + hw->phy.addr = phy_addr; + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_id); + + if (phy_id != 0xFFFF && phy_id != 0x0) + valid = true; + + return valid; +} + +/** + * ixgbe_get_phy_id - Get the phy type + * @hw: pointer to hardware structure + * + **/ +s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) +{ + u32 status; + u16 phy_id_high = 0; + u16 phy_id_low = 0; + + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &phy_id_high); + + if (status == 0) { + hw->phy.id = (u32)(phy_id_high << 16); + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_LOW, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &phy_id_low); + hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK); + hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK); + } + return status; +} + +/** + * ixgbe_get_phy_type_from_id - Get the phy type + * @hw: pointer to hardware structure + * + **/ +enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) +{ + enum ixgbe_phy_type phy_type; + + switch (phy_id) { + case TN1010_PHY_ID: + phy_type = ixgbe_phy_tn; + break; + case X540_PHY_ID: + phy_type = ixgbe_phy_aq; + break; + case QT2022_PHY_ID: + phy_type = ixgbe_phy_qt; + break; + case ATH_PHY_ID: + phy_type = ixgbe_phy_nl; + break; + default: + phy_type = ixgbe_phy_unknown; + break; + } + + hw_dbg(hw, "phy type found is %d\n", phy_type); + return phy_type; +} + +/** + * ixgbe_reset_phy_generic - Performs a PHY reset + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) +{ + u32 i; + u16 ctrl = 0; + s32 status = 0; + + if (hw->phy.type == ixgbe_phy_unknown) + status = ixgbe_identify_phy_generic(hw); + + if (status != 0 || hw->phy.type == ixgbe_phy_none) + goto out; + + /* Don't reset PHY if it's shut down due to overtemp. */ + if (!hw->phy.reset_if_overtemp && + (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw))) + goto out; + + /* + * Perform soft PHY reset to the PHY_XS. + * This will cause a soft reset to the PHY + */ + hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, + IXGBE_MDIO_PHY_XS_RESET); + + /* + * Poll for reset bit to self-clear indicating reset is complete. + * Some PHYs could take up to 3 seconds to complete and need about + * 1.7 usec delay after the reset is complete. + */ + for (i = 0; i < 30; i++) { + msleep(100); + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl); + if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { + udelay(2); + break; + } + } + + if (ctrl & IXGBE_MDIO_PHY_XS_RESET) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "PHY reset polling failed to complete.\n"); + } + +out: + return status; +} + +/** + * ixgbe_read_phy_reg_generic - Reads a value from a specified PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit address of PHY register to read + * @phy_data: Pointer to read data from PHY register + **/ +s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 *phy_data) +{ + u32 command; + u32 i; + u32 data; + s32 status = 0; + u16 gssr; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + gssr = IXGBE_GSSR_PHY1_SM; + else + gssr = IXGBE_GSSR_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) + status = IXGBE_ERR_SWFW_SYNC; + + if (status == 0) { + /* Setup and write the address cycle command */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle completed. + * The MDI Command bit will clear when the operation is + * complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY address command did not complete.\n"); + status = IXGBE_ERR_PHY; + } + + if (status == 0) { + /* + * Address cycle complete, setup and write the read + * command + */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle + * completed. The MDI Command bit will clear when the + * operation is complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY read command didn't complete\n"); + status = IXGBE_ERR_PHY; + } else { + /* + * Read operation is complete. Get the data + * from MSRWD + */ + data = IXGBE_READ_REG(hw, IXGBE_MSRWD); + data >>= IXGBE_MSRWD_READ_DATA_SHIFT; + *phy_data = (u16)(data); + } + } + + hw->mac.ops.release_swfw_sync(hw, gssr); + } + + return status; +} + +/** + * ixgbe_write_phy_reg_generic - Writes a value to specified PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit PHY register to write + * @device_type: 5 bit device type + * @phy_data: Data to write to the PHY register + **/ +s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 phy_data) +{ + u32 command; + u32 i; + s32 status = 0; + u16 gssr; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + gssr = IXGBE_GSSR_PHY1_SM; + else + gssr = IXGBE_GSSR_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) + status = IXGBE_ERR_SWFW_SYNC; + + if (status == 0) { + /* Put the data in the MDI single read and write data register*/ + IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data); + + /* Setup and write the address cycle command */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle completed. + * The MDI Command bit will clear when the operation is + * complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY address cmd didn't complete\n"); + status = IXGBE_ERR_PHY; + } + + if (status == 0) { + /* + * Address cycle complete, setup and write the write + * command + */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle + * completed. The MDI Command bit will clear when the + * operation is complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY address cmd didn't complete\n"); + status = IXGBE_ERR_PHY; + } + } + + hw->mac.ops.release_swfw_sync(hw, gssr); + } + + return status; +} + +/** + * ixgbe_setup_phy_link_generic - Set and restart autoneg + * @hw: pointer to hardware structure + * + * Restart autonegotiation and PHY and waits for completion. + **/ +s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 time_out; + u32 max_time_out = 10; + u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; + bool autoneg = false; + ixgbe_link_speed speed; + + ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) + autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) { + /* Set or unset auto-negotiation 1G advertisement */ + hw->phy.ops.read_reg(hw, + IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, + IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_100_FULL) { + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE | + IXGBE_MII_100BASE_T_ADVERTISE_HALF); + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) + autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + /* Restart PHY autonegotiation and wait for completion */ + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg); + + autoneg_reg |= IXGBE_MII_RESTART; + + hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg); + + /* Wait for autonegotiation to finish */ + for (time_out = 0; time_out < max_time_out; time_out++) { + udelay(10); + /* Restart PHY autonegotiation and wait for completion */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE; + if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) + break; + } + + if (time_out == max_time_out) { + status = IXGBE_ERR_LINK_SETUP; + hw_dbg(hw, "ixgbe_setup_phy_link_generic: time out"); + } + + return status; +} + +/** + * ixgbe_setup_phy_link_speed_generic - Sets the auto advertised capabilities + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + **/ +s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + + /* + * Clear autoneg_advertised and set new values based on input link + * speed. + */ + hw->phy.autoneg_advertised = 0; + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + if (speed & IXGBE_LINK_SPEED_100_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; + + /* Setup link based on the new speed settings */ + hw->phy.ops.setup_link(hw); + + return 0; +} + +/** + * ixgbe_get_copper_link_capabilities_generic - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: boolean auto-negotiation value + * + * Determines the link capabilities by reading the AUTOC register. + **/ +s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + s32 status = IXGBE_ERR_LINK_SETUP; + u16 speed_ability; + + *speed = 0; + *autoneg = true; + + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_SPEED_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &speed_ability); + + if (status == 0) { + if (speed_ability & IXGBE_MDIO_PHY_SPEED_10G) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M) + *speed |= IXGBE_LINK_SPEED_100_FULL; + } + + return status; +} + +/** + * ixgbe_check_phy_link_tnx - Determine link and speed status + * @hw: pointer to hardware structure + * + * Reads the VS1 register to determine if link is up and the current speed for + * the PHY. + **/ +s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + s32 status = 0; + u32 time_out; + u32 max_time_out = 10; + u16 phy_link = 0; + u16 phy_speed = 0; + u16 phy_data = 0; + + /* Initialize speed and link to default case */ + *link_up = false; + *speed = IXGBE_LINK_SPEED_10GB_FULL; + + /* + * Check current speed and link status of the PHY register. + * This is a vendor specific register and may have to + * be changed for other copper PHYs. + */ + for (time_out = 0; time_out < max_time_out; time_out++) { + udelay(10); + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + &phy_data); + phy_link = phy_data & IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS; + phy_speed = phy_data & + IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS; + if (phy_link == IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS) { + *link_up = true; + if (phy_speed == + IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS) + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + } + } + + return status; +} + +/** + * ixgbe_setup_phy_link_tnx - Set and restart autoneg + * @hw: pointer to hardware structure + * + * Restart autonegotiation and PHY and waits for completion. + **/ +s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 time_out; + u32 max_time_out = 10; + u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; + bool autoneg = false; + ixgbe_link_speed speed; + + ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) + autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) { + /* Set or unset auto-negotiation 1G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX; + + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_100_FULL) { + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_100BASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) + autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + /* Restart PHY autonegotiation and wait for completion */ + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg); + + autoneg_reg |= IXGBE_MII_RESTART; + + hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg); + + /* Wait for autonegotiation to finish */ + for (time_out = 0; time_out < max_time_out; time_out++) { + udelay(10); + /* Restart PHY autonegotiation and wait for completion */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE; + if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) + break; + } + + if (time_out == max_time_out) { + status = IXGBE_ERR_LINK_SETUP; + hw_dbg(hw, "ixgbe_setup_phy_link_tnx: time out"); + } + + return status; +} + +/** + * ixgbe_get_phy_firmware_version_tnx - Gets the PHY Firmware Version + * @hw: pointer to hardware structure + * @firmware_version: pointer to the PHY Firmware Version + **/ +s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw, + u16 *firmware_version) +{ + s32 status = 0; + + status = hw->phy.ops.read_reg(hw, TNX_FW_REV, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + firmware_version); + + return status; +} + +/** + * ixgbe_get_phy_firmware_version_generic - Gets the PHY Firmware Version + * @hw: pointer to hardware structure + * @firmware_version: pointer to the PHY Firmware Version + **/ +s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, + u16 *firmware_version) +{ + s32 status = 0; + + status = hw->phy.ops.read_reg(hw, AQ_FW_REV, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + firmware_version); + + return status; +} + +/** + * ixgbe_reset_phy_nl - Performs a PHY reset + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) +{ + u16 phy_offset, control, eword, edata, block_crc; + bool end_data = false; + u16 list_offset, data_offset; + u16 phy_data = 0; + s32 ret_val = 0; + u32 i; + + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data); + + /* reset the PHY and poll for completion */ + hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, + (phy_data | IXGBE_MDIO_PHY_XS_RESET)); + + for (i = 0; i < 100; i++) { + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data); + if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) == 0) + break; + msleep(10); + } + + if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) != 0) { + hw_dbg(hw, "PHY reset did not complete.\n"); + ret_val = IXGBE_ERR_PHY; + goto out; + } + + /* Get init offsets */ + ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, + &data_offset); + if (ret_val != 0) + goto out; + + ret_val = hw->eeprom.ops.read(hw, data_offset, &block_crc); + data_offset++; + while (!end_data) { + /* + * Read control word from PHY init contents offset + */ + ret_val = hw->eeprom.ops.read(hw, data_offset, &eword); + control = (eword & IXGBE_CONTROL_MASK_NL) >> + IXGBE_CONTROL_SHIFT_NL; + edata = eword & IXGBE_DATA_MASK_NL; + switch (control) { + case IXGBE_DELAY_NL: + data_offset++; + hw_dbg(hw, "DELAY: %d MS\n", edata); + msleep(edata); + break; + case IXGBE_DATA_NL: + hw_dbg(hw, "DATA:\n"); + data_offset++; + hw->eeprom.ops.read(hw, data_offset++, + &phy_offset); + for (i = 0; i < edata; i++) { + hw->eeprom.ops.read(hw, data_offset, &eword); + hw->phy.ops.write_reg(hw, phy_offset, + IXGBE_TWINAX_DEV, eword); + hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword, + phy_offset); + data_offset++; + phy_offset++; + } + break; + case IXGBE_CONTROL_NL: + data_offset++; + hw_dbg(hw, "CONTROL:\n"); + if (edata == IXGBE_CONTROL_EOL_NL) { + hw_dbg(hw, "EOL\n"); + end_data = true; + } else if (edata == IXGBE_CONTROL_SOL_NL) { + hw_dbg(hw, "SOL\n"); + } else { + hw_dbg(hw, "Bad control value\n"); + ret_val = IXGBE_ERR_PHY; + goto out; + } + break; + default: + hw_dbg(hw, "Bad control type\n"); + ret_val = IXGBE_ERR_PHY; + goto out; + } + } + +out: + return ret_val; +} + +/** + * ixgbe_identify_module_generic - Identifies module type + * @hw: pointer to hardware structure + * + * Determines HW type and calls appropriate function. + **/ +s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_SFP_NOT_PRESENT; + + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + status = ixgbe_identify_sfp_module_generic(hw); + break; + + case ixgbe_media_type_fiber_qsfp: + status = ixgbe_identify_qsfp_module_generic(hw); + break; + + default: + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + status = IXGBE_ERR_SFP_NOT_PRESENT; + break; + } + + return status; +} + +/** + * ixgbe_identify_sfp_module_generic - Identifies SFP modules + * @hw: pointer to hardware structure + * + * Searches for and identifies the SFP module and assigns appropriate PHY type. + **/ +s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + u32 vendor_oui = 0; + enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type; + u8 identifier = 0; + u8 comp_codes_1g = 0; + u8 comp_codes_10g = 0; + u8 oui_bytes[3] = {0, 0, 0}; + u8 cable_tech = 0; + u8 cable_spec = 0; + u16 enforce_sfp = 0; + + if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) { + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + status = IXGBE_ERR_SFP_NOT_PRESENT; + goto out; + } + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_IDENTIFIER, + &identifier); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + /* LAN ID is needed for sfp_type determination */ + hw->mac.ops.set_lan_id(hw); + + if (identifier != IXGBE_SFF_IDENTIFIER_SFP) { + hw->phy.type = ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + } else { + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_1GBE_COMP_CODES, + &comp_codes_1g); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_10GBE_COMP_CODES, + &comp_codes_10g); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_CABLE_TECHNOLOGY, + &cable_tech); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + /* ID Module + * ========= + * 0 SFP_DA_CU + * 1 SFP_SR + * 2 SFP_LR + * 3 SFP_DA_CORE0 - 82599-specific + * 4 SFP_DA_CORE1 - 82599-specific + * 5 SFP_SR/LR_CORE0 - 82599-specific + * 6 SFP_SR/LR_CORE1 - 82599-specific + * 7 SFP_act_lmt_DA_CORE0 - 82599-specific + * 8 SFP_act_lmt_DA_CORE1 - 82599-specific + * 9 SFP_1g_cu_CORE0 - 82599-specific + * 10 SFP_1g_cu_CORE1 - 82599-specific + * 11 SFP_1g_sx_CORE0 - 82599-specific + * 12 SFP_1g_sx_CORE1 - 82599-specific + */ + if (hw->mac.type == ixgbe_mac_82598EB) { + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + hw->phy.sfp_type = ixgbe_sfp_type_da_cu; + else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) + hw->phy.sfp_type = ixgbe_sfp_type_sr; + else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE) + hw->phy.sfp_type = ixgbe_sfp_type_lr; + else + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + } else if (hw->mac.type == ixgbe_mac_82599EB) { + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_da_cu_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_da_cu_core1; + } else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) { + hw->phy.ops.read_i2c_eeprom( + hw, IXGBE_SFF_CABLE_SPEC_COMP, + &cable_spec); + if (cable_spec & + IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core1; + } else { + hw->phy.sfp_type = + ixgbe_sfp_type_unknown; + } + } else if (comp_codes_10g & + (IXGBE_SFF_10GBASESR_CAPABLE | + IXGBE_SFF_10GBASELR_CAPABLE)) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_srlr_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_srlr_core1; + } else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_1g_cu_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_1g_cu_core1; + } else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_1g_sx_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_1g_sx_core1; + } else { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + } + } + + if (hw->phy.sfp_type != stored_sfp_type) + hw->phy.sfp_setup_needed = true; + + /* Determine if the SFP+ PHY is dual speed or not. */ + hw->phy.multispeed_fiber = false; + if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) && + (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) || + ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) && + (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE))) + hw->phy.multispeed_fiber = true; + + /* Determine PHY vendor */ + if (hw->phy.type != ixgbe_phy_nl) { + hw->phy.id = identifier; + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_VENDOR_OUI_BYTE0, + &oui_bytes[0]); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_VENDOR_OUI_BYTE1, + &oui_bytes[1]); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_VENDOR_OUI_BYTE2, + &oui_bytes[2]); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + vendor_oui = + ((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) | + (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) | + (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT)); + + switch (vendor_oui) { + case IXGBE_SFF_VENDOR_OUI_TYCO: + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + hw->phy.type = + ixgbe_phy_sfp_passive_tyco; + break; + case IXGBE_SFF_VENDOR_OUI_FTL: + if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) + hw->phy.type = ixgbe_phy_sfp_ftl_active; + else + hw->phy.type = ixgbe_phy_sfp_ftl; + break; + case IXGBE_SFF_VENDOR_OUI_AVAGO: + hw->phy.type = ixgbe_phy_sfp_avago; + break; + case IXGBE_SFF_VENDOR_OUI_INTEL: + hw->phy.type = ixgbe_phy_sfp_intel; + break; + default: + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + hw->phy.type = + ixgbe_phy_sfp_passive_unknown; + else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) + hw->phy.type = + ixgbe_phy_sfp_active_unknown; + else + hw->phy.type = ixgbe_phy_sfp_unknown; + break; + } + } + + /* Allow any DA cable vendor */ + if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE | + IXGBE_SFF_DA_ACTIVE_CABLE)) { + status = 0; + goto out; + } + + /* Verify supported 1G SFP modules */ + if (comp_codes_10g == 0 && + !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { + hw->phy.type = ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } + + /* Anything else 82598-based is supported */ + if (hw->mac.type == ixgbe_mac_82598EB) { + status = 0; + goto out; + } + + ixgbe_get_device_caps(hw, &enforce_sfp); + if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) && + !((hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0) || + (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1) || + (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0) || + (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1))) { + /* Make sure we're a supported PHY type */ + if (hw->phy.type == ixgbe_phy_sfp_intel) { + status = 0; + } else { + if (hw->allow_unsupported_sfp == true) { + EWARN(hw, "WARNING: Intel (R) Network " + "Connections are quality tested " + "using Intel (R) Ethernet Optics." + " Using untested modules is not " + "supported and may cause unstable" + " operation or damage to the " + "module or the adapter. Intel " + "Corporation is not responsible " + "for any harm caused by using " + "untested modules.\n", status); + status = 0; + } else { + hw_dbg(hw, "SFP+ module not supported\n"); + hw->phy.type = + ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + } + } + } else { + status = 0; + } + } + +out: + return status; + +err_read_i2c_eeprom: + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + if (hw->phy.type != ixgbe_phy_nl) { + hw->phy.id = 0; + hw->phy.type = ixgbe_phy_unknown; + } + return IXGBE_ERR_SFP_NOT_PRESENT; +} + +/** + * ixgbe_identify_qsfp_module_generic - Identifies QSFP modules + * @hw: pointer to hardware structure + * + * Searches for and identifies the QSFP module and assigns appropriate PHY type + **/ +s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + + if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) { + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + status = IXGBE_ERR_SFP_NOT_PRESENT; + } + + return status; +} + + +/** + * ixgbe_get_sfp_init_sequence_offsets - Provides offset of PHY init sequence + * @hw: pointer to hardware structure + * @list_offset: offset to the SFP ID list + * @data_offset: offset to the SFP data block + * + * Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if + * so it returns the offsets to the phy init sequence block. + **/ +s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, + u16 *list_offset, + u16 *data_offset) +{ + u16 sfp_id; + u16 sfp_type = hw->phy.sfp_type; + + if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) + return IXGBE_ERR_SFP_NOT_SUPPORTED; + + if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) + return IXGBE_ERR_SFP_NOT_PRESENT; + + if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) && + (hw->phy.sfp_type == ixgbe_sfp_type_da_cu)) + return IXGBE_ERR_SFP_NOT_SUPPORTED; + + /* + * Limiting active cables and 1G Phys must be initialized as + * SR modules + */ + if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 || + sfp_type == ixgbe_sfp_type_1g_cu_core0 || + sfp_type == ixgbe_sfp_type_1g_sx_core0) + sfp_type = ixgbe_sfp_type_srlr_core0; + else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 || + sfp_type == ixgbe_sfp_type_1g_cu_core1 || + sfp_type == ixgbe_sfp_type_1g_sx_core1) + sfp_type = ixgbe_sfp_type_srlr_core1; + + /* Read offset to PHY init contents */ + hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset); + + if ((!*list_offset) || (*list_offset == 0xFFFF)) + return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + + /* Shift offset to first ID word */ + (*list_offset)++; + + /* + * Find the matching SFP ID in the EEPROM + * and program the init sequence + */ + hw->eeprom.ops.read(hw, *list_offset, &sfp_id); + + while (sfp_id != IXGBE_PHY_INIT_END_NL) { + if (sfp_id == sfp_type) { + (*list_offset)++; + hw->eeprom.ops.read(hw, *list_offset, data_offset); + if ((!*data_offset) || (*data_offset == 0xFFFF)) { + hw_dbg(hw, "SFP+ module not supported\n"); + return IXGBE_ERR_SFP_NOT_SUPPORTED; + } else { + break; + } + } else { + (*list_offset) += 2; + if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id)) + return IXGBE_ERR_PHY; + } + } + + if (sfp_id == IXGBE_PHY_INIT_END_NL) { + hw_dbg(hw, "No matching SFP+ module found\n"); + return IXGBE_ERR_SFP_NOT_SUPPORTED; + } + + return 0; +} + +/** + * ixgbe_read_i2c_eeprom_generic - Reads 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to read + * @eeprom_data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data) +{ + return hw->phy.ops.read_i2c_byte(hw, byte_offset, + IXGBE_I2C_EEPROM_DEV_ADDR, + eeprom_data); +} + +/** + * ixgbe_write_i2c_eeprom_generic - Writes 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to write + * @eeprom_data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 eeprom_data) +{ + return hw->phy.ops.write_i2c_byte(hw, byte_offset, + IXGBE_I2C_EEPROM_DEV_ADDR, + eeprom_data); +} + +/** + * ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + s32 status = 0; + u32 max_retry = 10; + u32 retry = 0; + u16 swfw_mask = 0; + bool nack = 1; + *data = 0; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + swfw_mask = IXGBE_GSSR_PHY1_SM; + else + swfw_mask = IXGBE_GSSR_PHY0_SM; + + do { + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) + != 0) { + status = IXGBE_ERR_SWFW_SYNC; + goto read_byte_out; + } + + ixgbe_i2c_start(hw); + + /* Device Address and write indication */ + status = ixgbe_clock_out_i2c_byte(hw, dev_addr); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_byte(hw, byte_offset); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + ixgbe_i2c_start(hw); + + /* Device Address and read indication */ + status = ixgbe_clock_out_i2c_byte(hw, (dev_addr | 0x1)); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_in_i2c_byte(hw, data); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_bit(hw, nack); + if (status != 0) + goto fail; + + ixgbe_i2c_stop(hw); + break; + +fail: + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + msleep(100); + ixgbe_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + hw_dbg(hw, "I2C byte read error - Retrying.\n"); + else + hw_dbg(hw, "I2C byte read error.\n"); + + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +read_byte_out: + return status; +} + +/** + * ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + s32 status = 0; + u32 max_retry = 1; + u32 retry = 0; + u16 swfw_mask = 0; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + swfw_mask = IXGBE_GSSR_PHY1_SM; + else + swfw_mask = IXGBE_GSSR_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != 0) { + status = IXGBE_ERR_SWFW_SYNC; + goto write_byte_out; + } + + do { + ixgbe_i2c_start(hw); + + status = ixgbe_clock_out_i2c_byte(hw, dev_addr); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_byte(hw, byte_offset); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_byte(hw, data); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + ixgbe_i2c_stop(hw); + break; + +fail: + ixgbe_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + hw_dbg(hw, "I2C byte write error - Retrying.\n"); + else + hw_dbg(hw, "I2C byte write error.\n"); + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +write_byte_out: + return status; +} + +/** + * ixgbe_i2c_start - Sets I2C start condition + * @hw: pointer to hardware structure + * + * Sets I2C start condition (High -> Low on SDA while SCL is High) + **/ +static void ixgbe_i2c_start(struct ixgbe_hw *hw) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + /* Start condition must begin with data and clock high */ + ixgbe_set_i2c_data(hw, &i2cctl, 1); + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for start condition (4.7us) */ + udelay(IXGBE_I2C_T_SU_STA); + + ixgbe_set_i2c_data(hw, &i2cctl, 0); + + /* Hold time for start condition (4us) */ + udelay(IXGBE_I2C_T_HD_STA); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + udelay(IXGBE_I2C_T_LOW); + +} + +/** + * ixgbe_i2c_stop - Sets I2C stop condition + * @hw: pointer to hardware structure + * + * Sets I2C stop condition (Low -> High on SDA while SCL is High) + **/ +static void ixgbe_i2c_stop(struct ixgbe_hw *hw) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + /* Stop condition must begin with data low and clock high */ + ixgbe_set_i2c_data(hw, &i2cctl, 0); + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for stop condition (4us) */ + udelay(IXGBE_I2C_T_SU_STO); + + ixgbe_set_i2c_data(hw, &i2cctl, 1); + + /* bus free time between stop and start (4.7us)*/ + udelay(IXGBE_I2C_T_BUF); +} + +/** + * ixgbe_clock_in_i2c_byte - Clocks in one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte to clock in + * + * Clocks in one byte data via I2C data/clock + **/ +static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data) +{ + s32 i; + bool bit = 0; + + for (i = 7; i >= 0; i--) { + ixgbe_clock_in_i2c_bit(hw, &bit); + *data |= bit << i; + } + + return 0; +} + +/** + * ixgbe_clock_out_i2c_byte - Clocks out one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte clocked out + * + * Clocks out one byte data via I2C data/clock + **/ +static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data) +{ + s32 status = 0; + s32 i; + u32 i2cctl; + bool bit = 0; + + for (i = 7; i >= 0; i--) { + bit = (data >> i) & 0x1; + status = ixgbe_clock_out_i2c_bit(hw, bit); + + if (status != 0) + break; + } + + /* Release SDA line (set high) */ + i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + i2cctl |= IXGBE_I2C_DATA_OUT; + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, i2cctl); + IXGBE_WRITE_FLUSH(hw); + + return status; +} + +/** + * ixgbe_get_i2c_ack - Polls for I2C ACK + * @hw: pointer to hardware structure + * + * Clocks in/out one bit via I2C data/clock + **/ +static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 i = 0; + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + u32 timeout = 10; + bool ack = 1; + + ixgbe_raise_i2c_clk(hw, &i2cctl); + + + /* Minimum high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + /* Poll for ACK. Note that ACK in I2C spec is + * transition from 1 to 0 */ + for (i = 0; i < timeout; i++) { + i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + ack = ixgbe_get_i2c_data(&i2cctl); + + udelay(1); + if (ack == 0) + break; + } + + if (ack == 1) { + hw_dbg(hw, "I2C ack was not received.\n"); + status = IXGBE_ERR_I2C; + } + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + udelay(IXGBE_I2C_T_LOW); + + return status; +} + +/** + * ixgbe_clock_in_i2c_bit - Clocks in one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: read data value + * + * Clocks in one bit via I2C data/clock + **/ +static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + *data = ixgbe_get_i2c_data(&i2cctl); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + udelay(IXGBE_I2C_T_LOW); + + return 0; +} + +/** + * ixgbe_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: data value to write + * + * Clocks out one bit via I2C data/clock + **/ +static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data) +{ + s32 status; + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + status = ixgbe_set_i2c_data(hw, &i2cctl, data); + if (status == 0) { + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us. + * This also takes care of the data hold time. + */ + udelay(IXGBE_I2C_T_LOW); + } else { + status = IXGBE_ERR_I2C; + hw_dbg(hw, "I2C data was not set to %X\n", data); + } + + return status; +} +/** + * ixgbe_raise_i2c_clk - Raises the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Raises the I2C clock line '0'->'1' + **/ +static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) +{ + u32 i = 0; + u32 timeout = IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT; + u32 i2cctl_r = 0; + + for (i = 0; i < timeout; i++) { + *i2cctl |= IXGBE_I2C_CLK_OUT; + + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); + /* SCL rise time (1000ns) */ + udelay(IXGBE_I2C_T_RISE); + + i2cctl_r = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + if (i2cctl_r & IXGBE_I2C_CLK_IN) + break; + } +} + +/** + * ixgbe_lower_i2c_clk - Lowers the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Lowers the I2C clock line '1'->'0' + **/ +static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) +{ + + *i2cctl &= ~IXGBE_I2C_CLK_OUT; + + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); + + /* SCL fall time (300ns) */ + udelay(IXGBE_I2C_T_FALL); +} + +/** + * ixgbe_set_i2c_data - Sets the I2C data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * @data: I2C data value (0 or 1) to set + * + * Sets the I2C data bit + **/ +static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data) +{ + s32 status = 0; + + if (data) + *i2cctl |= IXGBE_I2C_DATA_OUT; + else + *i2cctl &= ~IXGBE_I2C_DATA_OUT; + + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); + + /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ + udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA); + + /* Verify data was set correctly */ + *i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + if (data != ixgbe_get_i2c_data(i2cctl)) { + status = IXGBE_ERR_I2C; + hw_dbg(hw, "Error - I2C data was not set to %X.\n", data); + } + + return status; +} + +/** + * ixgbe_get_i2c_data - Reads the I2C SDA data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Returns the I2C data bit value + **/ +static bool ixgbe_get_i2c_data(u32 *i2cctl) +{ + bool data; + + if (*i2cctl & IXGBE_I2C_DATA_IN) + data = 1; + else + data = 0; + + return data; +} + +/** + * ixgbe_i2c_bus_clear - Clears the I2C bus + * @hw: pointer to hardware structure + * + * Clears the I2C bus by sending nine clock pulses. + * Used when data line is stuck low. + **/ +void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + u32 i; + + ixgbe_i2c_start(hw); + + ixgbe_set_i2c_data(hw, &i2cctl, 1); + + for (i = 0; i < 9; i++) { + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Min high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Min low period of clock is 4.7us*/ + udelay(IXGBE_I2C_T_LOW); + } + + ixgbe_i2c_start(hw); + + /* Put the i2c bus back to default state */ + ixgbe_i2c_stop(hw); +} + +/** + * ixgbe_tn_check_overtemp - Checks if an overtemp occurred. + * @hw: pointer to hardware structure + * + * Checks if the LASI temp alarm status was triggered due to overtemp + **/ +s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 phy_data = 0; + + if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) + goto out; + + /* Check that the LASI temp alarm status was triggered */ + hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data); + + if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM)) + goto out; + + status = IXGBE_ERR_OVERTEMP; +out: + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h new file mode 100644 index 00000000..bbe5a9e3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h @@ -0,0 +1,137 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_PHY_H_ +#define _IXGBE_PHY_H_ + +#include "ixgbe_type.h" +#define IXGBE_I2C_EEPROM_DEV_ADDR 0xA0 + +/* EEPROM byte offsets */ +#define IXGBE_SFF_IDENTIFIER 0x0 +#define IXGBE_SFF_IDENTIFIER_SFP 0x3 +#define IXGBE_SFF_VENDOR_OUI_BYTE0 0x25 +#define IXGBE_SFF_VENDOR_OUI_BYTE1 0x26 +#define IXGBE_SFF_VENDOR_OUI_BYTE2 0x27 +#define IXGBE_SFF_1GBE_COMP_CODES 0x6 +#define IXGBE_SFF_10GBE_COMP_CODES 0x3 +#define IXGBE_SFF_CABLE_TECHNOLOGY 0x8 +#define IXGBE_SFF_CABLE_SPEC_COMP 0x3C + +/* Bitmasks */ +#define IXGBE_SFF_DA_PASSIVE_CABLE 0x4 +#define IXGBE_SFF_DA_ACTIVE_CABLE 0x8 +#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING 0x4 +#define IXGBE_SFF_1GBASESX_CAPABLE 0x1 +#define IXGBE_SFF_1GBASELX_CAPABLE 0x2 +#define IXGBE_SFF_1GBASET_CAPABLE 0x8 +#define IXGBE_SFF_10GBASESR_CAPABLE 0x10 +#define IXGBE_SFF_10GBASELR_CAPABLE 0x20 +#define IXGBE_I2C_EEPROM_READ_MASK 0x100 +#define IXGBE_I2C_EEPROM_STATUS_MASK 0x3 +#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0 +#define IXGBE_I2C_EEPROM_STATUS_PASS 0x1 +#define IXGBE_I2C_EEPROM_STATUS_FAIL 0x2 +#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS 0x3 + +/* Flow control defines */ +#define IXGBE_TAF_SYM_PAUSE 0x400 +#define IXGBE_TAF_ASM_PAUSE 0x800 + +/* Bit-shift macros */ +#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT 24 +#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT 16 +#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT 8 + +/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */ +#define IXGBE_SFF_VENDOR_OUI_TYCO 0x00407600 +#define IXGBE_SFF_VENDOR_OUI_FTL 0x00906500 +#define IXGBE_SFF_VENDOR_OUI_AVAGO 0x00176A00 +#define IXGBE_SFF_VENDOR_OUI_INTEL 0x001B2100 + +/* I2C SDA and SCL timing parameters for standard mode */ +#define IXGBE_I2C_T_HD_STA 4 +#define IXGBE_I2C_T_LOW 5 +#define IXGBE_I2C_T_HIGH 4 +#define IXGBE_I2C_T_SU_STA 5 +#define IXGBE_I2C_T_HD_DATA 5 +#define IXGBE_I2C_T_SU_DATA 1 +#define IXGBE_I2C_T_RISE 1 +#define IXGBE_I2C_T_FALL 1 +#define IXGBE_I2C_T_SU_STO 4 +#define IXGBE_I2C_T_BUF 5 + +#define IXGBE_TN_LASI_STATUS_REG 0x9005 +#define IXGBE_TN_LASI_STATUS_TEMP_ALARM 0x0008 + +s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw); +bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr); +enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id); +s32 ixgbe_get_phy_id(struct ixgbe_hw *hw); +s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw); +s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw); +s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 *phy_data); +s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 phy_data); +s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw); +s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg); + +/* PHY specific */ +s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up); +s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw); +s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw, + u16 *firmware_version); +s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, + u16 *firmware_version); + +s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw); +s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw); +s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); +s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw); +s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, + u16 *list_offset, + u16 *data_offset); +s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); +s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); +s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data); +s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 eeprom_data); +void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw); +#endif /* _IXGBE_PHY_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h new file mode 100644 index 00000000..5e3559fd --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h @@ -0,0 +1,73 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#ifndef _IXGBE_SRIOV_H_ +#define _IXGBE_SRIOV_H_ + +int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, + int entries, u16 *hash_list, u32 vf); +void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); +int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf); +void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe); +void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf); +void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf); +void ixgbe_msg_task(struct ixgbe_adapter *adapter); +int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, + int vf, unsigned char *mac_addr); +void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); +void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); +#ifdef IFLA_VF_MAX +int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); +int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, + u8 qos); +int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE +int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); +#endif +int ixgbe_ndo_get_vf_config(struct net_device *netdev, + int vf, struct ifla_vf_info *ivi); +#endif +void ixgbe_disable_sriov(struct ixgbe_adapter *adapter); +#ifdef CONFIG_PCI_IOV +int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); +void ixgbe_enable_sriov(struct ixgbe_adapter *adapter); +#endif +int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter); +#ifdef IFLA_VF_MAX +void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); +#endif /* IFLA_VF_MAX */ +void ixgbe_dump_registers(struct ixgbe_adapter *adapter); + +/* + * These are defined in ixgbe_type.h on behalf of the VF driver + * but we need them here unwrapped for the PF driver. + */ +#define IXGBE_DEV_ID_82599_VF 0x10ED +#define IXGBE_DEV_ID_X540_VF 0x1515 + +#endif /* _IXGBE_SRIOV_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h new file mode 100644 index 00000000..6b21c879 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h @@ -0,0 +1,3254 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_TYPE_H_ +#define _IXGBE_TYPE_H_ + +#include "ixgbe_osdep.h" + + +/* Vendor ID */ +#define IXGBE_INTEL_VENDOR_ID 0x8086 + +/* Device IDs */ +#define IXGBE_DEV_ID_82598 0x10B6 +#define IXGBE_DEV_ID_82598_BX 0x1508 +#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 +#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 +#define IXGBE_DEV_ID_82598AT 0x10C8 +#define IXGBE_DEV_ID_82598AT2 0x150B +#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB +#define IXGBE_DEV_ID_82598EB_CX4 0x10DD +#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC +#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 +#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 +#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 +#define IXGBE_DEV_ID_82599_KX4 0x10F7 +#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 +#define IXGBE_DEV_ID_82599_KR 0x1517 +#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 +#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C +#define IXGBE_DEV_ID_82599_CX4 0x10F9 +#define IXGBE_DEV_ID_82599_SFP 0x10FB +#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 +#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 +#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A +#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 +#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 +#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D +#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 +#define IXGBE_DEV_ID_82599EN_SFP 0x1557 +#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC +#define IXGBE_DEV_ID_82599_T3_LOM 0x151C +#define IXGBE_DEV_ID_82599_LS 0x154F +#define IXGBE_DEV_ID_X540T 0x1528 + +/* General Registers */ +#define IXGBE_CTRL 0x00000 +#define IXGBE_STATUS 0x00008 +#define IXGBE_CTRL_EXT 0x00018 +#define IXGBE_ESDP 0x00020 +#define IXGBE_EODSDP 0x00028 +#define IXGBE_I2CCTL 0x00028 +#define IXGBE_PHY_GPIO 0x00028 +#define IXGBE_MAC_GPIO 0x00030 +#define IXGBE_PHYINT_STATUS0 0x00100 +#define IXGBE_PHYINT_STATUS1 0x00104 +#define IXGBE_PHYINT_STATUS2 0x00108 +#define IXGBE_LEDCTL 0x00200 +#define IXGBE_FRTIMER 0x00048 +#define IXGBE_TCPTIMER 0x0004C +#define IXGBE_CORESPARE 0x00600 +#define IXGBE_EXVET 0x05078 + +/* NVM Registers */ +#define IXGBE_EEC 0x10010 +#define IXGBE_EERD 0x10014 +#define IXGBE_EEWR 0x10018 +#define IXGBE_FLA 0x1001C +#define IXGBE_EEMNGCTL 0x10110 +#define IXGBE_EEMNGDATA 0x10114 +#define IXGBE_FLMNGCTL 0x10118 +#define IXGBE_FLMNGDATA 0x1011C +#define IXGBE_FLMNGCNT 0x10120 +#define IXGBE_FLOP 0x1013C +#define IXGBE_GRC 0x10200 +#define IXGBE_SRAMREL 0x10210 +#define IXGBE_PHYDBG 0x10218 + +/* General Receive Control */ +#define IXGBE_GRC_MNG 0x00000001 /* Manageability Enable */ +#define IXGBE_GRC_APME 0x00000002 /* APM enabled in EEPROM */ + +#define IXGBE_VPDDIAG0 0x10204 +#define IXGBE_VPDDIAG1 0x10208 + +/* I2CCTL Bit Masks */ +#define IXGBE_I2C_CLK_IN 0x00000001 +#define IXGBE_I2C_CLK_OUT 0x00000002 +#define IXGBE_I2C_DATA_IN 0x00000004 +#define IXGBE_I2C_DATA_OUT 0x00000008 +#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT 500 + +#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8 +#define IXGBE_EMC_INTERNAL_DATA 0x00 +#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20 +#define IXGBE_EMC_DIODE1_DATA 0x01 +#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19 +#define IXGBE_EMC_DIODE2_DATA 0x23 +#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A + +#define IXGBE_MAX_SENSORS 3 + +struct ixgbe_thermal_diode_data { + u8 location; + u8 temp; + u8 caution_thresh; + u8 max_op_thresh; +}; + +struct ixgbe_thermal_sensor_data { + struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS]; +}; + +/* Interrupt Registers */ +#define IXGBE_EICR 0x00800 +#define IXGBE_EICS 0x00808 +#define IXGBE_EIMS 0x00880 +#define IXGBE_EIMC 0x00888 +#define IXGBE_EIAC 0x00810 +#define IXGBE_EIAM 0x00890 +#define IXGBE_EICS_EX(_i) (0x00A90 + (_i) * 4) +#define IXGBE_EIMS_EX(_i) (0x00AA0 + (_i) * 4) +#define IXGBE_EIMC_EX(_i) (0x00AB0 + (_i) * 4) +#define IXGBE_EIAM_EX(_i) (0x00AD0 + (_i) * 4) +/* 82599 EITR is only 12 bits, with the lower 3 always zero */ +/* + * 82598 EITR is 16 bits but set the limits based on the max + * supported by all ixgbe hardware + */ +#define IXGBE_MAX_INT_RATE 488281 +#define IXGBE_MIN_INT_RATE 956 +#define IXGBE_MAX_EITR 0x00000FF8 +#define IXGBE_MIN_EITR 8 +#define IXGBE_EITR(_i) (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \ + (0x012300 + (((_i) - 24) * 4))) +#define IXGBE_EITR_ITR_INT_MASK 0x00000FF8 +#define IXGBE_EITR_LLI_MOD 0x00008000 +#define IXGBE_EITR_CNT_WDIS 0x80000000 +#define IXGBE_IVAR(_i) (0x00900 + ((_i) * 4)) /* 24 at 0x900-0x960 */ +#define IXGBE_IVAR_MISC 0x00A00 /* misc MSI-X interrupt causes */ +#define IXGBE_EITRSEL 0x00894 +#define IXGBE_MSIXT 0x00000 /* MSI-X Table. 0x0000 - 0x01C */ +#define IXGBE_MSIXPBA 0x02000 /* MSI-X Pending bit array */ +#define IXGBE_PBACL(_i) (((_i) == 0) ? (0x11068) : (0x110C0 + ((_i) * 4))) +#define IXGBE_GPIE 0x00898 + +/* Flow Control Registers */ +#define IXGBE_FCADBUL 0x03210 +#define IXGBE_FCADBUH 0x03214 +#define IXGBE_FCAMACL 0x04328 +#define IXGBE_FCAMACH 0x0432C +#define IXGBE_FCRTH_82599(_i) (0x03260 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_FCRTL_82599(_i) (0x03220 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_PFCTOP 0x03008 +#define IXGBE_FCTTV(_i) (0x03200 + ((_i) * 4)) /* 4 of these (0-3) */ +#define IXGBE_FCRTL(_i) (0x03220 + ((_i) * 8)) /* 8 of these (0-7) */ +#define IXGBE_FCRTH(_i) (0x03260 + ((_i) * 8)) /* 8 of these (0-7) */ +#define IXGBE_FCRTV 0x032A0 +#define IXGBE_FCCFG 0x03D00 +#define IXGBE_TFCS 0x0CE00 + +/* Receive DMA Registers */ +#define IXGBE_RDBAL(_i) (((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \ + (0x0D000 + (((_i) - 64) * 0x40))) +#define IXGBE_RDBAH(_i) (((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \ + (0x0D004 + (((_i) - 64) * 0x40))) +#define IXGBE_RDLEN(_i) (((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \ + (0x0D008 + (((_i) - 64) * 0x40))) +#define IXGBE_RDH(_i) (((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \ + (0x0D010 + (((_i) - 64) * 0x40))) +#define IXGBE_RDT(_i) (((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \ + (0x0D018 + (((_i) - 64) * 0x40))) +#define IXGBE_RXDCTL(_i) (((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \ + (0x0D028 + (((_i) - 64) * 0x40))) +#define IXGBE_RSCCTL(_i) (((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \ + (0x0D02C + (((_i) - 64) * 0x40))) +#define IXGBE_RSCDBU 0x03028 +#define IXGBE_RDDCC 0x02F20 +#define IXGBE_RXMEMWRAP 0x03190 +#define IXGBE_STARCTRL 0x03024 +/* + * Split and Replication Receive Control Registers + * 00-15 : 0x02100 + n*4 + * 16-64 : 0x01014 + n*0x40 + * 64-127: 0x0D014 + (n-64)*0x40 + */ +#define IXGBE_SRRCTL(_i) (((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \ + (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \ + (0x0D014 + (((_i) - 64) * 0x40)))) +/* + * Rx DCA Control Register: + * 00-15 : 0x02200 + n*4 + * 16-64 : 0x0100C + n*0x40 + * 64-127: 0x0D00C + (n-64)*0x40 + */ +#define IXGBE_DCA_RXCTRL(_i) (((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \ + (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \ + (0x0D00C + (((_i) - 64) * 0x40)))) +#define IXGBE_RDRXCTL 0x02F00 +#define IXGBE_RDRXCTL_RSC_PUSH 0x80 +/* 8 of these 0x03C00 - 0x03C1C */ +#define IXGBE_RXPBSIZE(_i) (0x03C00 + ((_i) * 4)) +#define IXGBE_RXCTRL 0x03000 +#define IXGBE_DROPEN 0x03D04 +#define IXGBE_RXPBSIZE_SHIFT 10 + +/* Receive Registers */ +#define IXGBE_RXCSUM 0x05000 +#define IXGBE_RFCTL 0x05008 +#define IXGBE_DRECCCTL 0x02F08 +#define IXGBE_DRECCCTL_DISABLE 0 +#define IXGBE_DRECCCTL2 0x02F8C + +/* Multicast Table Array - 128 entries */ +#define IXGBE_MTA(_i) (0x05200 + ((_i) * 4)) +#define IXGBE_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ + (0x0A200 + ((_i) * 8))) +#define IXGBE_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ + (0x0A204 + ((_i) * 8))) +#define IXGBE_MPSAR_LO(_i) (0x0A600 + ((_i) * 8)) +#define IXGBE_MPSAR_HI(_i) (0x0A604 + ((_i) * 8)) +/* Packet split receive type */ +#define IXGBE_PSRTYPE(_i) (((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \ + (0x0EA00 + ((_i) * 4))) +/* array of 4096 1-bit vlan filters */ +#define IXGBE_VFTA(_i) (0x0A000 + ((_i) * 4)) +/*array of 4096 4-bit vlan vmdq indices */ +#define IXGBE_VFTAVIND(_j, _i) (0x0A200 + ((_j) * 0x200) + ((_i) * 4)) +#define IXGBE_FCTRL 0x05080 +#define IXGBE_VLNCTRL 0x05088 +#define IXGBE_MCSTCTRL 0x05090 +#define IXGBE_MRQC 0x05818 +#define IXGBE_SAQF(_i) (0x0E000 + ((_i) * 4)) /* Source Address Queue Filter */ +#define IXGBE_DAQF(_i) (0x0E200 + ((_i) * 4)) /* Dest. Address Queue Filter */ +#define IXGBE_SDPQF(_i) (0x0E400 + ((_i) * 4)) /* Src Dest. Addr Queue Filter */ +#define IXGBE_FTQF(_i) (0x0E600 + ((_i) * 4)) /* Five Tuple Queue Filter */ +#define IXGBE_ETQF(_i) (0x05128 + ((_i) * 4)) /* EType Queue Filter */ +#define IXGBE_ETQS(_i) (0x0EC00 + ((_i) * 4)) /* EType Queue Select */ +#define IXGBE_SYNQF 0x0EC30 /* SYN Packet Queue Filter */ +#define IXGBE_RQTC 0x0EC70 +#define IXGBE_MTQC 0x08120 +#define IXGBE_VLVF(_i) (0x0F100 + ((_i) * 4)) /* 64 of these (0-63) */ +#define IXGBE_VLVFB(_i) (0x0F200 + ((_i) * 4)) /* 128 of these (0-127) */ +#define IXGBE_VMVIR(_i) (0x08000 + ((_i) * 4)) /* 64 of these (0-63) */ +#define IXGBE_VT_CTL 0x051B0 +#define IXGBE_PFMAILBOX(_i) (0x04B00 + (4 * (_i))) /* 64 total */ +/* 64 Mailboxes, 16 DW each */ +#define IXGBE_PFMBMEM(_i) (0x13000 + (64 * (_i))) +#define IXGBE_PFMBICR(_i) (0x00710 + (4 * (_i))) /* 4 total */ +#define IXGBE_PFMBIMR(_i) (0x00720 + (4 * (_i))) /* 4 total */ +#define IXGBE_VFRE(_i) (0x051E0 + ((_i) * 4)) +#define IXGBE_VFTE(_i) (0x08110 + ((_i) * 4)) +#define IXGBE_VMECM(_i) (0x08790 + ((_i) * 4)) +#define IXGBE_QDE 0x2F04 +#define IXGBE_VMTXSW(_i) (0x05180 + ((_i) * 4)) /* 2 total */ +#define IXGBE_VMOLR(_i) (0x0F000 + ((_i) * 4)) /* 64 total */ +#define IXGBE_UTA(_i) (0x0F400 + ((_i) * 4)) +#define IXGBE_MRCTL(_i) (0x0F600 + ((_i) * 4)) +#define IXGBE_VMRVLAN(_i) (0x0F610 + ((_i) * 4)) +#define IXGBE_VMRVM(_i) (0x0F630 + ((_i) * 4)) +#define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/ +#define IXGBE_RXFECCERR0 0x051B8 +#define IXGBE_LLITHRESH 0x0EC90 +#define IXGBE_IMIR(_i) (0x05A80 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_IMIRVP 0x05AC0 +#define IXGBE_VMD_CTL 0x0581C +#define IXGBE_RETA(_i) (0x05C00 + ((_i) * 4)) /* 32 of these (0-31) */ +#define IXGBE_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* 10 of these (0-9) */ + +/* Flow Director registers */ +#define IXGBE_FDIRCTRL 0x0EE00 +#define IXGBE_FDIRHKEY 0x0EE68 +#define IXGBE_FDIRSKEY 0x0EE6C +#define IXGBE_FDIRDIP4M 0x0EE3C +#define IXGBE_FDIRSIP4M 0x0EE40 +#define IXGBE_FDIRTCPM 0x0EE44 +#define IXGBE_FDIRUDPM 0x0EE48 +#define IXGBE_FDIRIP6M 0x0EE74 +#define IXGBE_FDIRM 0x0EE70 + +/* Flow Director Stats registers */ +#define IXGBE_FDIRFREE 0x0EE38 +#define IXGBE_FDIRLEN 0x0EE4C +#define IXGBE_FDIRUSTAT 0x0EE50 +#define IXGBE_FDIRFSTAT 0x0EE54 +#define IXGBE_FDIRMATCH 0x0EE58 +#define IXGBE_FDIRMISS 0x0EE5C + +/* Flow Director Programming registers */ +#define IXGBE_FDIRSIPv6(_i) (0x0EE0C + ((_i) * 4)) /* 3 of these (0-2) */ +#define IXGBE_FDIRIPSA 0x0EE18 +#define IXGBE_FDIRIPDA 0x0EE1C +#define IXGBE_FDIRPORT 0x0EE20 +#define IXGBE_FDIRVLAN 0x0EE24 +#define IXGBE_FDIRHASH 0x0EE28 +#define IXGBE_FDIRCMD 0x0EE2C + +/* Transmit DMA registers */ +#define IXGBE_TDBAL(_i) (0x06000 + ((_i) * 0x40)) /* 32 of them (0-31)*/ +#define IXGBE_TDBAH(_i) (0x06004 + ((_i) * 0x40)) +#define IXGBE_TDLEN(_i) (0x06008 + ((_i) * 0x40)) +#define IXGBE_TDH(_i) (0x06010 + ((_i) * 0x40)) +#define IXGBE_TDT(_i) (0x06018 + ((_i) * 0x40)) +#define IXGBE_TXDCTL(_i) (0x06028 + ((_i) * 0x40)) +#define IXGBE_TDWBAL(_i) (0x06038 + ((_i) * 0x40)) +#define IXGBE_TDWBAH(_i) (0x0603C + ((_i) * 0x40)) +#define IXGBE_DTXCTL 0x07E00 + +#define IXGBE_DMATXCTL 0x04A80 +#define IXGBE_PFVFSPOOF(_i) (0x08200 + ((_i) * 4)) /* 8 of these 0 - 7 */ +#define IXGBE_PFDTXGSWC 0x08220 +#define IXGBE_DTXMXSZRQ 0x08100 +#define IXGBE_DTXTCPFLGL 0x04A88 +#define IXGBE_DTXTCPFLGH 0x04A8C +#define IXGBE_LBDRPEN 0x0CA00 +#define IXGBE_TXPBTHRESH(_i) (0x04950 + ((_i) * 4)) /* 8 of these 0 - 7 */ + +#define IXGBE_DMATXCTL_TE 0x1 /* Transmit Enable */ +#define IXGBE_DMATXCTL_NS 0x2 /* No Snoop LSO hdr buffer */ +#define IXGBE_DMATXCTL_GDV 0x8 /* Global Double VLAN */ +#define IXGBE_DMATXCTL_VT_SHIFT 16 /* VLAN EtherType */ + +#define IXGBE_PFDTXGSWC_VT_LBEN 0x1 /* Local L2 VT switch enable */ + +/* Anti-spoofing defines */ +#define IXGBE_SPOOF_MACAS_MASK 0xFF +#define IXGBE_SPOOF_VLANAS_MASK 0xFF00 +#define IXGBE_SPOOF_VLANAS_SHIFT 8 +#define IXGBE_PFVFSPOOF_REG_COUNT 8 +/* 16 of these (0-15) */ +#define IXGBE_DCA_TXCTRL(_i) (0x07200 + ((_i) * 4)) +/* Tx DCA Control register : 128 of these (0-127) */ +#define IXGBE_DCA_TXCTRL_82599(_i) (0x0600C + ((_i) * 0x40)) +#define IXGBE_TIPG 0x0CB00 +#define IXGBE_TXPBSIZE(_i) (0x0CC00 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_MNGTXMAP 0x0CD10 +#define IXGBE_TIPG_FIBER_DEFAULT 3 +#define IXGBE_TXPBSIZE_SHIFT 10 + +/* Wake up registers */ +#define IXGBE_WUC 0x05800 +#define IXGBE_WUFC 0x05808 +#define IXGBE_WUS 0x05810 +#define IXGBE_IPAV 0x05838 +#define IXGBE_IP4AT 0x05840 /* IPv4 table 0x5840-0x5858 */ +#define IXGBE_IP6AT 0x05880 /* IPv6 table 0x5880-0x588F */ + +#define IXGBE_WUPL 0x05900 +#define IXGBE_WUPM 0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */ +#define IXGBE_FHFT(_n) (0x09000 + (_n * 0x100)) /* Flex host filter table */ +/* Ext Flexible Host Filter Table */ +#define IXGBE_FHFT_EXT(_n) (0x09800 + (_n * 0x100)) + +#define IXGBE_FLEXIBLE_FILTER_COUNT_MAX 4 +#define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX 2 + +/* Each Flexible Filter is at most 128 (0x80) bytes in length */ +#define IXGBE_FLEXIBLE_FILTER_SIZE_MAX 128 +#define IXGBE_FHFT_LENGTH_OFFSET 0xFC /* Length byte in FHFT */ +#define IXGBE_FHFT_LENGTH_MASK 0x0FF /* Length in lower byte */ + +/* Definitions for power management and wakeup registers */ +/* Wake Up Control */ +#define IXGBE_WUC_PME_EN 0x00000002 /* PME Enable */ +#define IXGBE_WUC_PME_STATUS 0x00000004 /* PME Status */ +#define IXGBE_WUC_WKEN 0x00000010 /* Enable PE_WAKE_N pin assertion */ + +/* Wake Up Filter Control */ +#define IXGBE_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define IXGBE_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define IXGBE_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define IXGBE_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define IXGBE_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IXGBE_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ +#define IXGBE_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ +#define IXGBE_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */ +#define IXGBE_WUFC_MNG 0x00000100 /* Directed Mgmt Packet Wakeup Enable */ + +#define IXGBE_WUFC_IGNORE_TCO 0x00008000 /* Ignore WakeOn TCO packets */ +#define IXGBE_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ +#define IXGBE_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */ +#define IXGBE_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */ +#define IXGBE_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */ +#define IXGBE_WUFC_FLX4 0x00100000 /* Flexible Filter 4 Enable */ +#define IXGBE_WUFC_FLX5 0x00200000 /* Flexible Filter 5 Enable */ +#define IXGBE_WUFC_FLX_FILTERS 0x000F0000 /* Mask for 4 flex filters */ +/* Mask for Ext. flex filters */ +#define IXGBE_WUFC_EXT_FLX_FILTERS 0x00300000 +#define IXGBE_WUFC_ALL_FILTERS 0x003F00FF /* Mask for all wakeup filters */ +#define IXGBE_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */ + +/* Wake Up Status */ +#define IXGBE_WUS_LNKC IXGBE_WUFC_LNKC +#define IXGBE_WUS_MAG IXGBE_WUFC_MAG +#define IXGBE_WUS_EX IXGBE_WUFC_EX +#define IXGBE_WUS_MC IXGBE_WUFC_MC +#define IXGBE_WUS_BC IXGBE_WUFC_BC +#define IXGBE_WUS_ARP IXGBE_WUFC_ARP +#define IXGBE_WUS_IPV4 IXGBE_WUFC_IPV4 +#define IXGBE_WUS_IPV6 IXGBE_WUFC_IPV6 +#define IXGBE_WUS_MNG IXGBE_WUFC_MNG +#define IXGBE_WUS_FLX0 IXGBE_WUFC_FLX0 +#define IXGBE_WUS_FLX1 IXGBE_WUFC_FLX1 +#define IXGBE_WUS_FLX2 IXGBE_WUFC_FLX2 +#define IXGBE_WUS_FLX3 IXGBE_WUFC_FLX3 +#define IXGBE_WUS_FLX4 IXGBE_WUFC_FLX4 +#define IXGBE_WUS_FLX5 IXGBE_WUFC_FLX5 +#define IXGBE_WUS_FLX_FILTERS IXGBE_WUFC_FLX_FILTERS + +/* Wake Up Packet Length */ +#define IXGBE_WUPL_LENGTH_MASK 0xFFFF + +/* DCB registers */ +#define IXGBE_DCB_MAX_TRAFFIC_CLASS 8 +#define IXGBE_RMCS 0x03D00 +#define IXGBE_DPMCS 0x07F40 +#define IXGBE_PDPMCS 0x0CD00 +#define IXGBE_RUPPBMR 0x050A0 +#define IXGBE_RT2CR(_i) (0x03C20 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RT2SR(_i) (0x03C40 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_TDTQ2TCCR(_i) (0x0602C + ((_i) * 0x40)) /* 8 of these (0-7) */ +#define IXGBE_TDTQ2TCSR(_i) (0x0622C + ((_i) * 0x40)) /* 8 of these (0-7) */ +#define IXGBE_TDPT2TCCR(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_TDPT2TCSR(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */ + + +/* Security Control Registers */ +#define IXGBE_SECTXCTRL 0x08800 +#define IXGBE_SECTXSTAT 0x08804 +#define IXGBE_SECTXBUFFAF 0x08808 +#define IXGBE_SECTXMINIFG 0x08810 +#define IXGBE_SECRXCTRL 0x08D00 +#define IXGBE_SECRXSTAT 0x08D04 + +/* Security Bit Fields and Masks */ +#define IXGBE_SECTXCTRL_SECTX_DIS 0x00000001 +#define IXGBE_SECTXCTRL_TX_DIS 0x00000002 +#define IXGBE_SECTXCTRL_STORE_FORWARD 0x00000004 + +#define IXGBE_SECTXSTAT_SECTX_RDY 0x00000001 +#define IXGBE_SECTXSTAT_ECC_TXERR 0x00000002 + +#define IXGBE_SECRXCTRL_SECRX_DIS 0x00000001 +#define IXGBE_SECRXCTRL_RX_DIS 0x00000002 + +#define IXGBE_SECRXSTAT_SECRX_RDY 0x00000001 +#define IXGBE_SECRXSTAT_ECC_RXERR 0x00000002 + +/* LinkSec (MacSec) Registers */ +#define IXGBE_LSECTXCAP 0x08A00 +#define IXGBE_LSECRXCAP 0x08F00 +#define IXGBE_LSECTXCTRL 0x08A04 +#define IXGBE_LSECTXSCL 0x08A08 /* SCI Low */ +#define IXGBE_LSECTXSCH 0x08A0C /* SCI High */ +#define IXGBE_LSECTXSA 0x08A10 +#define IXGBE_LSECTXPN0 0x08A14 +#define IXGBE_LSECTXPN1 0x08A18 +#define IXGBE_LSECTXKEY0(_n) (0x08A1C + (4 * (_n))) /* 4 of these (0-3) */ +#define IXGBE_LSECTXKEY1(_n) (0x08A2C + (4 * (_n))) /* 4 of these (0-3) */ +#define IXGBE_LSECRXCTRL 0x08F04 +#define IXGBE_LSECRXSCL 0x08F08 +#define IXGBE_LSECRXSCH 0x08F0C +#define IXGBE_LSECRXSA(_i) (0x08F10 + (4 * (_i))) /* 2 of these (0-1) */ +#define IXGBE_LSECRXPN(_i) (0x08F18 + (4 * (_i))) /* 2 of these (0-1) */ +#define IXGBE_LSECRXKEY(_n, _m) (0x08F20 + ((0x10 * (_n)) + (4 * (_m)))) +#define IXGBE_LSECTXUT 0x08A3C /* OutPktsUntagged */ +#define IXGBE_LSECTXPKTE 0x08A40 /* OutPktsEncrypted */ +#define IXGBE_LSECTXPKTP 0x08A44 /* OutPktsProtected */ +#define IXGBE_LSECTXOCTE 0x08A48 /* OutOctetsEncrypted */ +#define IXGBE_LSECTXOCTP 0x08A4C /* OutOctetsProtected */ +#define IXGBE_LSECRXUT 0x08F40 /* InPktsUntagged/InPktsNoTag */ +#define IXGBE_LSECRXOCTD 0x08F44 /* InOctetsDecrypted */ +#define IXGBE_LSECRXOCTV 0x08F48 /* InOctetsValidated */ +#define IXGBE_LSECRXBAD 0x08F4C /* InPktsBadTag */ +#define IXGBE_LSECRXNOSCI 0x08F50 /* InPktsNoSci */ +#define IXGBE_LSECRXUNSCI 0x08F54 /* InPktsUnknownSci */ +#define IXGBE_LSECRXUNCH 0x08F58 /* InPktsUnchecked */ +#define IXGBE_LSECRXDELAY 0x08F5C /* InPktsDelayed */ +#define IXGBE_LSECRXLATE 0x08F60 /* InPktsLate */ +#define IXGBE_LSECRXOK(_n) (0x08F64 + (0x04 * (_n))) /* InPktsOk */ +#define IXGBE_LSECRXINV(_n) (0x08F6C + (0x04 * (_n))) /* InPktsInvalid */ +#define IXGBE_LSECRXNV(_n) (0x08F74 + (0x04 * (_n))) /* InPktsNotValid */ +#define IXGBE_LSECRXUNSA 0x08F7C /* InPktsUnusedSa */ +#define IXGBE_LSECRXNUSA 0x08F80 /* InPktsNotUsingSa */ + +/* LinkSec (MacSec) Bit Fields and Masks */ +#define IXGBE_LSECTXCAP_SUM_MASK 0x00FF0000 +#define IXGBE_LSECTXCAP_SUM_SHIFT 16 +#define IXGBE_LSECRXCAP_SUM_MASK 0x00FF0000 +#define IXGBE_LSECRXCAP_SUM_SHIFT 16 + +#define IXGBE_LSECTXCTRL_EN_MASK 0x00000003 +#define IXGBE_LSECTXCTRL_DISABLE 0x0 +#define IXGBE_LSECTXCTRL_AUTH 0x1 +#define IXGBE_LSECTXCTRL_AUTH_ENCRYPT 0x2 +#define IXGBE_LSECTXCTRL_AISCI 0x00000020 +#define IXGBE_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00 +#define IXGBE_LSECTXCTRL_RSV_MASK 0x000000D8 + +#define IXGBE_LSECRXCTRL_EN_MASK 0x0000000C +#define IXGBE_LSECRXCTRL_EN_SHIFT 2 +#define IXGBE_LSECRXCTRL_DISABLE 0x0 +#define IXGBE_LSECRXCTRL_CHECK 0x1 +#define IXGBE_LSECRXCTRL_STRICT 0x2 +#define IXGBE_LSECRXCTRL_DROP 0x3 +#define IXGBE_LSECRXCTRL_PLSH 0x00000040 +#define IXGBE_LSECRXCTRL_RP 0x00000080 +#define IXGBE_LSECRXCTRL_RSV_MASK 0xFFFFFF33 + +/* IpSec Registers */ +#define IXGBE_IPSTXIDX 0x08900 +#define IXGBE_IPSTXSALT 0x08904 +#define IXGBE_IPSTXKEY(_i) (0x08908 + (4 * (_i))) /* 4 of these (0-3) */ +#define IXGBE_IPSRXIDX 0x08E00 +#define IXGBE_IPSRXIPADDR(_i) (0x08E04 + (4 * (_i))) /* 4 of these (0-3) */ +#define IXGBE_IPSRXSPI 0x08E14 +#define IXGBE_IPSRXIPIDX 0x08E18 +#define IXGBE_IPSRXKEY(_i) (0x08E1C + (4 * (_i))) /* 4 of these (0-3) */ +#define IXGBE_IPSRXSALT 0x08E2C +#define IXGBE_IPSRXMOD 0x08E30 + +#define IXGBE_SECTXCTRL_STORE_FORWARD_ENABLE 0x4 + +/* DCB registers */ +#define IXGBE_RTRPCS 0x02430 +#define IXGBE_RTTDCS 0x04900 +#define IXGBE_RTTDCS_ARBDIS 0x00000040 /* DCB arbiter disable */ +#define IXGBE_RTTPCS 0x0CD00 +#define IXGBE_RTRUP2TC 0x03020 +#define IXGBE_RTTUP2TC 0x0C800 +#define IXGBE_RTRPT4C(_i) (0x02140 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_TXLLQ(_i) (0x082E0 + ((_i) * 4)) /* 4 of these (0-3) */ +#define IXGBE_RTRPT4S(_i) (0x02160 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTDT2C(_i) (0x04910 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTDT2S(_i) (0x04930 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTPT2C(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTPT2S(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTDQSEL 0x04904 +#define IXGBE_RTTDT1C 0x04908 +#define IXGBE_RTTDT1S 0x0490C +#define IXGBE_RTTDTECC 0x04990 +#define IXGBE_RTTDTECC_NO_BCN 0x00000100 + +#define IXGBE_RTTBCNRC 0x04984 +#define IXGBE_RTTBCNRC_RS_ENA 0x80000000 +#define IXGBE_RTTBCNRC_RF_DEC_MASK 0x00003FFF +#define IXGBE_RTTBCNRC_RF_INT_SHIFT 14 +#define IXGBE_RTTBCNRC_RF_INT_MASK \ + (IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT) +#define IXGBE_RTTBCNRM 0x04980 + +/* FCoE DMA Context Registers */ +#define IXGBE_FCPTRL 0x02410 /* FC User Desc. PTR Low */ +#define IXGBE_FCPTRH 0x02414 /* FC USer Desc. PTR High */ +#define IXGBE_FCBUFF 0x02418 /* FC Buffer Control */ +#define IXGBE_FCDMARW 0x02420 /* FC Receive DMA RW */ +#define IXGBE_FCINVST0 0x03FC0 /* FC Invalid DMA Context Status Reg 0*/ +#define IXGBE_FCINVST(_i) (IXGBE_FCINVST0 + ((_i) * 4)) +#define IXGBE_FCBUFF_VALID (1 << 0) /* DMA Context Valid */ +#define IXGBE_FCBUFF_BUFFSIZE (3 << 3) /* User Buffer Size */ +#define IXGBE_FCBUFF_WRCONTX (1 << 7) /* 0: Initiator, 1: Target */ +#define IXGBE_FCBUFF_BUFFCNT 0x0000ff00 /* Number of User Buffers */ +#define IXGBE_FCBUFF_OFFSET 0xffff0000 /* User Buffer Offset */ +#define IXGBE_FCBUFF_BUFFSIZE_SHIFT 3 +#define IXGBE_FCBUFF_BUFFCNT_SHIFT 8 +#define IXGBE_FCBUFF_OFFSET_SHIFT 16 +#define IXGBE_FCDMARW_WE (1 << 14) /* Write enable */ +#define IXGBE_FCDMARW_RE (1 << 15) /* Read enable */ +#define IXGBE_FCDMARW_FCOESEL 0x000001ff /* FC X_ID: 11 bits */ +#define IXGBE_FCDMARW_LASTSIZE 0xffff0000 /* Last User Buffer Size */ +#define IXGBE_FCDMARW_LASTSIZE_SHIFT 16 +/* FCoE SOF/EOF */ +#define IXGBE_TEOFF 0x04A94 /* Tx FC EOF */ +#define IXGBE_TSOFF 0x04A98 /* Tx FC SOF */ +#define IXGBE_REOFF 0x05158 /* Rx FC EOF */ +#define IXGBE_RSOFF 0x051F8 /* Rx FC SOF */ +/* FCoE Filter Context Registers */ +#define IXGBE_FCFLT 0x05108 /* FC FLT Context */ +#define IXGBE_FCFLTRW 0x05110 /* FC Filter RW Control */ +#define IXGBE_FCPARAM 0x051d8 /* FC Offset Parameter */ +#define IXGBE_FCFLT_VALID (1 << 0) /* Filter Context Valid */ +#define IXGBE_FCFLT_FIRST (1 << 1) /* Filter First */ +#define IXGBE_FCFLT_SEQID 0x00ff0000 /* Sequence ID */ +#define IXGBE_FCFLT_SEQCNT 0xff000000 /* Sequence Count */ +#define IXGBE_FCFLTRW_RVALDT (1 << 13) /* Fast Re-Validation */ +#define IXGBE_FCFLTRW_WE (1 << 14) /* Write Enable */ +#define IXGBE_FCFLTRW_RE (1 << 15) /* Read Enable */ +/* FCoE Receive Control */ +#define IXGBE_FCRXCTRL 0x05100 /* FC Receive Control */ +#define IXGBE_FCRXCTRL_FCOELLI (1 << 0) /* Low latency interrupt */ +#define IXGBE_FCRXCTRL_SAVBAD (1 << 1) /* Save Bad Frames */ +#define IXGBE_FCRXCTRL_FRSTRDH (1 << 2) /* EN 1st Read Header */ +#define IXGBE_FCRXCTRL_LASTSEQH (1 << 3) /* EN Last Header in Seq */ +#define IXGBE_FCRXCTRL_ALLH (1 << 4) /* EN All Headers */ +#define IXGBE_FCRXCTRL_FRSTSEQH (1 << 5) /* EN 1st Seq. Header */ +#define IXGBE_FCRXCTRL_ICRC (1 << 6) /* Ignore Bad FC CRC */ +#define IXGBE_FCRXCTRL_FCCRCBO (1 << 7) /* FC CRC Byte Ordering */ +#define IXGBE_FCRXCTRL_FCOEVER 0x00000f00 /* FCoE Version: 4 bits */ +#define IXGBE_FCRXCTRL_FCOEVER_SHIFT 8 +/* FCoE Redirection */ +#define IXGBE_FCRECTL 0x0ED00 /* FC Redirection Control */ +#define IXGBE_FCRETA0 0x0ED10 /* FC Redirection Table 0 */ +#define IXGBE_FCRETA(_i) (IXGBE_FCRETA0 + ((_i) * 4)) /* FCoE Redir */ +#define IXGBE_FCRECTL_ENA 0x1 /* FCoE Redir Table Enable */ +#define IXGBE_FCRETASEL_ENA 0x2 /* FCoE FCRETASEL bit */ +#define IXGBE_FCRETA_SIZE 8 /* Max entries in FCRETA */ +#define IXGBE_FCRETA_ENTRY_MASK 0x0000007f /* 7 bits for the queue index */ + +/* Stats registers */ +#define IXGBE_CRCERRS 0x04000 +#define IXGBE_ILLERRC 0x04004 +#define IXGBE_ERRBC 0x04008 +#define IXGBE_MSPDC 0x04010 +#define IXGBE_MPC(_i) (0x03FA0 + ((_i) * 4)) /* 8 of these 3FA0-3FBC*/ +#define IXGBE_MLFC 0x04034 +#define IXGBE_MRFC 0x04038 +#define IXGBE_RLEC 0x04040 +#define IXGBE_LXONTXC 0x03F60 +#define IXGBE_LXONRXC 0x0CF60 +#define IXGBE_LXOFFTXC 0x03F68 +#define IXGBE_LXOFFRXC 0x0CF68 +#define IXGBE_LXONRXCNT 0x041A4 +#define IXGBE_LXOFFRXCNT 0x041A8 +#define IXGBE_PXONRXCNT(_i) (0x04140 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_PXOFFRXCNT(_i) (0x04160 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_PXON2OFFCNT(_i) (0x03240 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_PXONTXC(_i) (0x03F00 + ((_i) * 4)) /* 8 of these 3F00-3F1C*/ +#define IXGBE_PXONRXC(_i) (0x0CF00 + ((_i) * 4)) /* 8 of these CF00-CF1C*/ +#define IXGBE_PXOFFTXC(_i) (0x03F20 + ((_i) * 4)) /* 8 of these 3F20-3F3C*/ +#define IXGBE_PXOFFRXC(_i) (0x0CF20 + ((_i) * 4)) /* 8 of these CF20-CF3C*/ +#define IXGBE_PRC64 0x0405C +#define IXGBE_PRC127 0x04060 +#define IXGBE_PRC255 0x04064 +#define IXGBE_PRC511 0x04068 +#define IXGBE_PRC1023 0x0406C +#define IXGBE_PRC1522 0x04070 +#define IXGBE_GPRC 0x04074 +#define IXGBE_BPRC 0x04078 +#define IXGBE_MPRC 0x0407C +#define IXGBE_GPTC 0x04080 +#define IXGBE_GORCL 0x04088 +#define IXGBE_GORCH 0x0408C +#define IXGBE_GOTCL 0x04090 +#define IXGBE_GOTCH 0x04094 +#define IXGBE_RNBC(_i) (0x03FC0 + ((_i) * 4)) /* 8 of these 3FC0-3FDC*/ +#define IXGBE_RUC 0x040A4 +#define IXGBE_RFC 0x040A8 +#define IXGBE_ROC 0x040AC +#define IXGBE_RJC 0x040B0 +#define IXGBE_MNGPRC 0x040B4 +#define IXGBE_MNGPDC 0x040B8 +#define IXGBE_MNGPTC 0x0CF90 +#define IXGBE_TORL 0x040C0 +#define IXGBE_TORH 0x040C4 +#define IXGBE_TPR 0x040D0 +#define IXGBE_TPT 0x040D4 +#define IXGBE_PTC64 0x040D8 +#define IXGBE_PTC127 0x040DC +#define IXGBE_PTC255 0x040E0 +#define IXGBE_PTC511 0x040E4 +#define IXGBE_PTC1023 0x040E8 +#define IXGBE_PTC1522 0x040EC +#define IXGBE_MPTC 0x040F0 +#define IXGBE_BPTC 0x040F4 +#define IXGBE_XEC 0x04120 +#define IXGBE_SSVPC 0x08780 + +#define IXGBE_RQSMR(_i) (0x02300 + ((_i) * 4)) +#define IXGBE_TQSMR(_i) (((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \ + (0x08600 + ((_i) * 4))) +#define IXGBE_TQSM(_i) (0x08600 + ((_i) * 4)) + +#define IXGBE_QPRC(_i) (0x01030 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QPTC(_i) (0x06030 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBRC(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBTC(_i) (0x06034 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBRC_L(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBRC_H(_i) (0x01038 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QPRDC(_i) (0x01430 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBTC_L(_i) (0x08700 + ((_i) * 0x8)) /* 16 of these */ +#define IXGBE_QBTC_H(_i) (0x08704 + ((_i) * 0x8)) /* 16 of these */ +#define IXGBE_FCCRC 0x05118 /* Num of Good Eth CRC w/ Bad FC CRC */ +#define IXGBE_FCOERPDC 0x0241C /* FCoE Rx Packets Dropped Count */ +#define IXGBE_FCLAST 0x02424 /* FCoE Last Error Count */ +#define IXGBE_FCOEPRC 0x02428 /* Number of FCoE Packets Received */ +#define IXGBE_FCOEDWRC 0x0242C /* Number of FCoE DWords Received */ +#define IXGBE_FCOEPTC 0x08784 /* Number of FCoE Packets Transmitted */ +#define IXGBE_FCOEDWTC 0x08788 /* Number of FCoE DWords Transmitted */ +#define IXGBE_FCCRC_CNT_MASK 0x0000FFFF /* CRC_CNT: bit 0 - 15 */ +#define IXGBE_FCLAST_CNT_MASK 0x0000FFFF /* Last_CNT: bit 0 - 15 */ +#define IXGBE_O2BGPTC 0x041C4 +#define IXGBE_O2BSPC 0x087B0 +#define IXGBE_B2OSPC 0x041C0 +#define IXGBE_B2OGPRC 0x02F90 +#define IXGBE_BUPRC 0x04180 +#define IXGBE_BMPRC 0x04184 +#define IXGBE_BBPRC 0x04188 +#define IXGBE_BUPTC 0x0418C +#define IXGBE_BMPTC 0x04190 +#define IXGBE_BBPTC 0x04194 +#define IXGBE_BCRCERRS 0x04198 +#define IXGBE_BXONRXC 0x0419C +#define IXGBE_BXOFFRXC 0x041E0 +#define IXGBE_BXONTXC 0x041E4 +#define IXGBE_BXOFFTXC 0x041E8 +#define IXGBE_PCRC8ECL 0x0E810 +#define IXGBE_PCRC8ECH 0x0E811 +#define IXGBE_PCRC8ECH_MASK 0x1F +#define IXGBE_LDPCECL 0x0E820 +#define IXGBE_LDPCECH 0x0E821 + +/* Management */ +#define IXGBE_MAVTV(_i) (0x05010 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_MFUTP(_i) (0x05030 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_MANC 0x05820 +#define IXGBE_MFVAL 0x05824 +#define IXGBE_MANC2H 0x05860 +#define IXGBE_MDEF(_i) (0x05890 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_MIPAF 0x058B0 +#define IXGBE_MMAL(_i) (0x05910 + ((_i) * 8)) /* 4 of these (0-3) */ +#define IXGBE_MMAH(_i) (0x05914 + ((_i) * 8)) /* 4 of these (0-3) */ +#define IXGBE_FTFT 0x09400 /* 0x9400-0x97FC */ +#define IXGBE_METF(_i) (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */ +#define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_LSWFW 0x15014 +#define IXGBE_BMCIP(_i) (0x05050 + ((_i) * 4)) /* 0x5050-0x505C */ +#define IXGBE_BMCIPVAL 0x05060 +#define IXGBE_BMCIP_IPADDR_TYPE 0x00000001 +#define IXGBE_BMCIP_IPADDR_VALID 0x00000002 + +/* Management Bit Fields and Masks */ +#define IXGBE_MANC_EN_BMC2OS 0x10000000 /* Ena BMC2OS and OS2BMC traffic */ +#define IXGBE_MANC_EN_BMC2OS_SHIFT 28 + +/* Firmware Semaphore Register */ +#define IXGBE_FWSM_MODE_MASK 0xE + +/* ARC Subsystem registers */ +#define IXGBE_HICR 0x15F00 +#define IXGBE_FWSTS 0x15F0C +#define IXGBE_HSMC0R 0x15F04 +#define IXGBE_HSMC1R 0x15F08 +#define IXGBE_SWSR 0x15F10 +#define IXGBE_HFDR 0x15FE8 +#define IXGBE_FLEX_MNG 0x15800 /* 0x15800 - 0x15EFC */ + +#define IXGBE_HICR_EN 0x01 /* Enable bit - RO */ +/* Driver sets this bit when done to put command in RAM */ +#define IXGBE_HICR_C 0x02 +#define IXGBE_HICR_SV 0x04 /* Status Validity */ +#define IXGBE_HICR_FW_RESET_ENABLE 0x40 +#define IXGBE_HICR_FW_RESET 0x80 + +/* PCI-E registers */ +#define IXGBE_GCR 0x11000 +#define IXGBE_GTV 0x11004 +#define IXGBE_FUNCTAG 0x11008 +#define IXGBE_GLT 0x1100C +#define IXGBE_PCIEPIPEADR 0x11004 +#define IXGBE_PCIEPIPEDAT 0x11008 +#define IXGBE_GSCL_1 0x11010 +#define IXGBE_GSCL_2 0x11014 +#define IXGBE_GSCL_3 0x11018 +#define IXGBE_GSCL_4 0x1101C +#define IXGBE_GSCN_0 0x11020 +#define IXGBE_GSCN_1 0x11024 +#define IXGBE_GSCN_2 0x11028 +#define IXGBE_GSCN_3 0x1102C +#define IXGBE_FACTPS 0x10150 +#define IXGBE_PCIEANACTL 0x11040 +#define IXGBE_SWSM 0x10140 +#define IXGBE_FWSM 0x10148 +#define IXGBE_GSSR 0x10160 +#define IXGBE_MREVID 0x11064 +#define IXGBE_DCA_ID 0x11070 +#define IXGBE_DCA_CTRL 0x11074 +#define IXGBE_SWFW_SYNC IXGBE_GSSR + +/* PCI-E registers 82599-Specific */ +#define IXGBE_GCR_EXT 0x11050 +#define IXGBE_GSCL_5_82599 0x11030 +#define IXGBE_GSCL_6_82599 0x11034 +#define IXGBE_GSCL_7_82599 0x11038 +#define IXGBE_GSCL_8_82599 0x1103C +#define IXGBE_PHYADR_82599 0x11040 +#define IXGBE_PHYDAT_82599 0x11044 +#define IXGBE_PHYCTL_82599 0x11048 +#define IXGBE_PBACLR_82599 0x11068 +#define IXGBE_CIAA_82599 0x11088 +#define IXGBE_CIAD_82599 0x1108C +#define IXGBE_PICAUSE 0x110B0 +#define IXGBE_PIENA 0x110B8 +#define IXGBE_CDQ_MBR_82599 0x110B4 +#define IXGBE_PCIESPARE 0x110BC +#define IXGBE_MISC_REG_82599 0x110F0 +#define IXGBE_ECC_CTRL_0_82599 0x11100 +#define IXGBE_ECC_CTRL_1_82599 0x11104 +#define IXGBE_ECC_STATUS_82599 0x110E0 +#define IXGBE_BAR_CTRL_82599 0x110F4 + +/* PCI Express Control */ +#define IXGBE_GCR_CMPL_TMOUT_MASK 0x0000F000 +#define IXGBE_GCR_CMPL_TMOUT_10ms 0x00001000 +#define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000 +#define IXGBE_GCR_CAP_VER2 0x00040000 + +#define IXGBE_GCR_EXT_MSIX_EN 0x80000000 +#define IXGBE_GCR_EXT_BUFFERS_CLEAR 0x40000000 +#define IXGBE_GCR_EXT_VT_MODE_16 0x00000001 +#define IXGBE_GCR_EXT_VT_MODE_32 0x00000002 +#define IXGBE_GCR_EXT_VT_MODE_64 0x00000003 +#define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \ + IXGBE_GCR_EXT_VT_MODE_64) +/* Time Sync Registers */ +#define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ +#define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ +#define IXGBE_RXSTMPL 0x051E8 /* Rx timestamp Low - RO */ +#define IXGBE_RXSTMPH 0x051A4 /* Rx timestamp High - RO */ +#define IXGBE_RXSATRL 0x051A0 /* Rx timestamp attribute low - RO */ +#define IXGBE_RXSATRH 0x051A8 /* Rx timestamp attribute high - RO */ +#define IXGBE_RXMTRL 0x05120 /* RX message type register low - RW */ +#define IXGBE_TXSTMPL 0x08C04 /* Tx timestamp value Low - RO */ +#define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */ +#define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */ +#define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */ +#define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */ +#define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */ +#define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */ +#define IXGBE_TSAUXC 0x08C20 /* TimeSync Auxiliary Control register - RW */ +#define IXGBE_TRGTTIML0 0x08C24 /* Target Time Register 0 Low - RW */ +#define IXGBE_TRGTTIMH0 0x08C28 /* Target Time Register 0 High - RW */ +#define IXGBE_TRGTTIML1 0x08C2C /* Target Time Register 1 Low - RW */ +#define IXGBE_TRGTTIMH1 0x08C30 /* Target Time Register 1 High - RW */ +#define IXGBE_FREQOUT0 0x08C34 /* Frequency Out 0 Control register - RW */ +#define IXGBE_FREQOUT1 0x08C38 /* Frequency Out 1 Control register - RW */ +#define IXGBE_AUXSTMPL0 0x08C3C /* Auxiliary Time Stamp 0 register Low - RO */ +#define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */ +#define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ +#define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ + +/* Diagnostic Registers */ +#define IXGBE_RDSTATCTL 0x02C20 +#define IXGBE_RDSTAT(_i) (0x02C00 + ((_i) * 4)) /* 0x02C00-0x02C1C */ +#define IXGBE_RDHMPN 0x02F08 +#define IXGBE_RIC_DW(_i) (0x02F10 + ((_i) * 4)) +#define IXGBE_RDPROBE 0x02F20 +#define IXGBE_RDMAM 0x02F30 +#define IXGBE_RDMAD 0x02F34 +#define IXGBE_TDSTATCTL 0x07C20 +#define IXGBE_TDSTAT(_i) (0x07C00 + ((_i) * 4)) /* 0x07C00 - 0x07C1C */ +#define IXGBE_TDHMPN 0x07F08 +#define IXGBE_TDHMPN2 0x082FC +#define IXGBE_TXDESCIC 0x082CC +#define IXGBE_TIC_DW(_i) (0x07F10 + ((_i) * 4)) +#define IXGBE_TIC_DW2(_i) (0x082B0 + ((_i) * 4)) +#define IXGBE_TDPROBE 0x07F20 +#define IXGBE_TXBUFCTRL 0x0C600 +#define IXGBE_TXBUFDATA0 0x0C610 +#define IXGBE_TXBUFDATA1 0x0C614 +#define IXGBE_TXBUFDATA2 0x0C618 +#define IXGBE_TXBUFDATA3 0x0C61C +#define IXGBE_RXBUFCTRL 0x03600 +#define IXGBE_RXBUFDATA0 0x03610 +#define IXGBE_RXBUFDATA1 0x03614 +#define IXGBE_RXBUFDATA2 0x03618 +#define IXGBE_RXBUFDATA3 0x0361C +#define IXGBE_PCIE_DIAG(_i) (0x11090 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_RFVAL 0x050A4 +#define IXGBE_MDFTC1 0x042B8 +#define IXGBE_MDFTC2 0x042C0 +#define IXGBE_MDFTFIFO1 0x042C4 +#define IXGBE_MDFTFIFO2 0x042C8 +#define IXGBE_MDFTS 0x042CC +#define IXGBE_RXDATAWRPTR(_i) (0x03700 + ((_i) * 4)) /* 8 of these 3700-370C*/ +#define IXGBE_RXDESCWRPTR(_i) (0x03710 + ((_i) * 4)) /* 8 of these 3710-371C*/ +#define IXGBE_RXDATARDPTR(_i) (0x03720 + ((_i) * 4)) /* 8 of these 3720-372C*/ +#define IXGBE_RXDESCRDPTR(_i) (0x03730 + ((_i) * 4)) /* 8 of these 3730-373C*/ +#define IXGBE_TXDATAWRPTR(_i) (0x0C700 + ((_i) * 4)) /* 8 of these C700-C70C*/ +#define IXGBE_TXDESCWRPTR(_i) (0x0C710 + ((_i) * 4)) /* 8 of these C710-C71C*/ +#define IXGBE_TXDATARDPTR(_i) (0x0C720 + ((_i) * 4)) /* 8 of these C720-C72C*/ +#define IXGBE_TXDESCRDPTR(_i) (0x0C730 + ((_i) * 4)) /* 8 of these C730-C73C*/ +#define IXGBE_PCIEECCCTL 0x1106C +#define IXGBE_RXWRPTR(_i) (0x03100 + ((_i) * 4)) /* 8 of these 3100-310C*/ +#define IXGBE_RXUSED(_i) (0x03120 + ((_i) * 4)) /* 8 of these 3120-312C*/ +#define IXGBE_RXRDPTR(_i) (0x03140 + ((_i) * 4)) /* 8 of these 3140-314C*/ +#define IXGBE_RXRDWRPTR(_i) (0x03160 + ((_i) * 4)) /* 8 of these 3160-310C*/ +#define IXGBE_TXWRPTR(_i) (0x0C100 + ((_i) * 4)) /* 8 of these C100-C10C*/ +#define IXGBE_TXUSED(_i) (0x0C120 + ((_i) * 4)) /* 8 of these C120-C12C*/ +#define IXGBE_TXRDPTR(_i) (0x0C140 + ((_i) * 4)) /* 8 of these C140-C14C*/ +#define IXGBE_TXRDWRPTR(_i) (0x0C160 + ((_i) * 4)) /* 8 of these C160-C10C*/ +#define IXGBE_PCIEECCCTL0 0x11100 +#define IXGBE_PCIEECCCTL1 0x11104 +#define IXGBE_RXDBUECC 0x03F70 +#define IXGBE_TXDBUECC 0x0CF70 +#define IXGBE_RXDBUEST 0x03F74 +#define IXGBE_TXDBUEST 0x0CF74 +#define IXGBE_PBTXECC 0x0C300 +#define IXGBE_PBRXECC 0x03300 +#define IXGBE_GHECCR 0x110B0 + +/* MAC Registers */ +#define IXGBE_PCS1GCFIG 0x04200 +#define IXGBE_PCS1GLCTL 0x04208 +#define IXGBE_PCS1GLSTA 0x0420C +#define IXGBE_PCS1GDBG0 0x04210 +#define IXGBE_PCS1GDBG1 0x04214 +#define IXGBE_PCS1GANA 0x04218 +#define IXGBE_PCS1GANLP 0x0421C +#define IXGBE_PCS1GANNP 0x04220 +#define IXGBE_PCS1GANLPNP 0x04224 +#define IXGBE_HLREG0 0x04240 +#define IXGBE_HLREG1 0x04244 +#define IXGBE_PAP 0x04248 +#define IXGBE_MACA 0x0424C +#define IXGBE_APAE 0x04250 +#define IXGBE_ARD 0x04254 +#define IXGBE_AIS 0x04258 +#define IXGBE_MSCA 0x0425C +#define IXGBE_MSRWD 0x04260 +#define IXGBE_MLADD 0x04264 +#define IXGBE_MHADD 0x04268 +#define IXGBE_MAXFRS 0x04268 +#define IXGBE_TREG 0x0426C +#define IXGBE_PCSS1 0x04288 +#define IXGBE_PCSS2 0x0428C +#define IXGBE_XPCSS 0x04290 +#define IXGBE_MFLCN 0x04294 +#define IXGBE_SERDESC 0x04298 +#define IXGBE_MACS 0x0429C +#define IXGBE_AUTOC 0x042A0 +#define IXGBE_LINKS 0x042A4 +#define IXGBE_LINKS2 0x04324 +#define IXGBE_AUTOC2 0x042A8 +#define IXGBE_AUTOC3 0x042AC +#define IXGBE_ANLP1 0x042B0 +#define IXGBE_ANLP2 0x042B4 +#define IXGBE_MACC 0x04330 +#define IXGBE_ATLASCTL 0x04800 +#define IXGBE_MMNGC 0x042D0 +#define IXGBE_ANLPNP1 0x042D4 +#define IXGBE_ANLPNP2 0x042D8 +#define IXGBE_KRPCSFC 0x042E0 +#define IXGBE_KRPCSS 0x042E4 +#define IXGBE_FECS1 0x042E8 +#define IXGBE_FECS2 0x042EC +#define IXGBE_SMADARCTL 0x14F10 +#define IXGBE_MPVC 0x04318 +#define IXGBE_SGMIIC 0x04314 + +/* Statistics Registers */ +#define IXGBE_RXNFGPC 0x041B0 +#define IXGBE_RXNFGBCL 0x041B4 +#define IXGBE_RXNFGBCH 0x041B8 +#define IXGBE_RXDGPC 0x02F50 +#define IXGBE_RXDGBCL 0x02F54 +#define IXGBE_RXDGBCH 0x02F58 +#define IXGBE_RXDDGPC 0x02F5C +#define IXGBE_RXDDGBCL 0x02F60 +#define IXGBE_RXDDGBCH 0x02F64 +#define IXGBE_RXLPBKGPC 0x02F68 +#define IXGBE_RXLPBKGBCL 0x02F6C +#define IXGBE_RXLPBKGBCH 0x02F70 +#define IXGBE_RXDLPBKGPC 0x02F74 +#define IXGBE_RXDLPBKGBCL 0x02F78 +#define IXGBE_RXDLPBKGBCH 0x02F7C +#define IXGBE_TXDGPC 0x087A0 +#define IXGBE_TXDGBCL 0x087A4 +#define IXGBE_TXDGBCH 0x087A8 + +#define IXGBE_RXDSTATCTRL 0x02F40 + +/* Copper Pond 2 link timeout */ +#define IXGBE_VALIDATE_LINK_READY_TIMEOUT 50 + +/* Omer CORECTL */ +#define IXGBE_CORECTL 0x014F00 +/* BARCTRL */ +#define IXGBE_BARCTRL 0x110F4 +#define IXGBE_BARCTRL_FLSIZE 0x0700 +#define IXGBE_BARCTRL_FLSIZE_SHIFT 8 +#define IXGBE_BARCTRL_CSRSIZE 0x2000 + +/* RSCCTL Bit Masks */ +#define IXGBE_RSCCTL_RSCEN 0x01 +#define IXGBE_RSCCTL_MAXDESC_1 0x00 +#define IXGBE_RSCCTL_MAXDESC_4 0x04 +#define IXGBE_RSCCTL_MAXDESC_8 0x08 +#define IXGBE_RSCCTL_MAXDESC_16 0x0C + +/* RSCDBU Bit Masks */ +#define IXGBE_RSCDBU_RSCSMALDIS_MASK 0x0000007F +#define IXGBE_RSCDBU_RSCACKDIS 0x00000080 + +/* RDRXCTL Bit Masks */ +#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000 /* Rx Desc Min THLD Size */ +#define IXGBE_RDRXCTL_CRCSTRIP 0x00000002 /* CRC Strip */ +#define IXGBE_RDRXCTL_MVMEN 0x00000020 +#define IXGBE_RDRXCTL_DMAIDONE 0x00000008 /* DMA init cycle done */ +#define IXGBE_RDRXCTL_AGGDIS 0x00010000 /* Aggregation disable */ +#define IXGBE_RDRXCTL_RSCFRSTSIZE 0x003E0000 /* RSC First packet size */ +#define IXGBE_RDRXCTL_RSCLLIDIS 0x00800000 /* Disabl RSC compl on LLI */ +#define IXGBE_RDRXCTL_RSCACKC 0x02000000 /* must set 1 when RSC ena */ +#define IXGBE_RDRXCTL_FCOE_WRFIX 0x04000000 /* must set 1 when RSC ena */ + +/* RQTC Bit Masks and Shifts */ +#define IXGBE_RQTC_SHIFT_TC(_i) ((_i) * 4) +#define IXGBE_RQTC_TC0_MASK (0x7 << 0) +#define IXGBE_RQTC_TC1_MASK (0x7 << 4) +#define IXGBE_RQTC_TC2_MASK (0x7 << 8) +#define IXGBE_RQTC_TC3_MASK (0x7 << 12) +#define IXGBE_RQTC_TC4_MASK (0x7 << 16) +#define IXGBE_RQTC_TC5_MASK (0x7 << 20) +#define IXGBE_RQTC_TC6_MASK (0x7 << 24) +#define IXGBE_RQTC_TC7_MASK (0x7 << 28) + +/* PSRTYPE.RQPL Bit masks and shift */ +#define IXGBE_PSRTYPE_RQPL_MASK 0x7 +#define IXGBE_PSRTYPE_RQPL_SHIFT 29 + +/* CTRL Bit Masks */ +#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Master Disable bit */ +#define IXGBE_CTRL_LNK_RST 0x00000008 /* Link Reset. Resets everything. */ +#define IXGBE_CTRL_RST 0x04000000 /* Reset (SW) */ +#define IXGBE_CTRL_RST_MASK (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST) + +/* FACTPS */ +#define IXGBE_FACTPS_LFS 0x40000000 /* LAN Function Select */ + +/* MHADD Bit Masks */ +#define IXGBE_MHADD_MFS_MASK 0xFFFF0000 +#define IXGBE_MHADD_MFS_SHIFT 16 + +/* Extended Device Control */ +#define IXGBE_CTRL_EXT_PFRSTD 0x00004000 /* Physical Function Reset Done */ +#define IXGBE_CTRL_EXT_NS_DIS 0x00010000 /* No Snoop disable */ +#define IXGBE_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ +#define IXGBE_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ + +/* Direct Cache Access (DCA) definitions */ +#define IXGBE_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */ +#define IXGBE_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */ + +#define IXGBE_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */ +#define IXGBE_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ + +#define IXGBE_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ +#define IXGBE_DCA_RXCTRL_CPUID_MASK_82599 0xFF000000 /* Rx CPUID Mask */ +#define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599 24 /* Rx CPUID Shift */ +#define IXGBE_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* Rx Desc enable */ +#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* Rx Desc header ena */ +#define IXGBE_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* Rx Desc payload ena */ +#define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* Rx rd Desc Relax Order */ +#define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */ +#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */ + +#define IXGBE_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ +#define IXGBE_DCA_TXCTRL_CPUID_MASK_82599 0xFF000000 /* Tx CPUID Mask */ +#define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599 24 /* Tx CPUID Shift */ +#define IXGBE_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ +#define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ +#define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */ +#define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ +#define IXGBE_DCA_MAX_QUEUES_82598 16 /* DCA regs only on 16 queues */ + +/* MSCA Bit Masks */ +#define IXGBE_MSCA_NP_ADDR_MASK 0x0000FFFF /* MDI Addr (new prot) */ +#define IXGBE_MSCA_NP_ADDR_SHIFT 0 +#define IXGBE_MSCA_DEV_TYPE_MASK 0x001F0000 /* Dev Type (new prot) */ +#define IXGBE_MSCA_DEV_TYPE_SHIFT 16 /* Register Address (old prot */ +#define IXGBE_MSCA_PHY_ADDR_MASK 0x03E00000 /* PHY Address mask */ +#define IXGBE_MSCA_PHY_ADDR_SHIFT 21 /* PHY Address shift*/ +#define IXGBE_MSCA_OP_CODE_MASK 0x0C000000 /* OP CODE mask */ +#define IXGBE_MSCA_OP_CODE_SHIFT 26 /* OP CODE shift */ +#define IXGBE_MSCA_ADDR_CYCLE 0x00000000 /* OP CODE 00 (addr cycle) */ +#define IXGBE_MSCA_WRITE 0x04000000 /* OP CODE 01 (wr) */ +#define IXGBE_MSCA_READ 0x0C000000 /* OP CODE 11 (rd) */ +#define IXGBE_MSCA_READ_AUTOINC 0x08000000 /* OP CODE 10 (rd auto inc)*/ +#define IXGBE_MSCA_ST_CODE_MASK 0x30000000 /* ST Code mask */ +#define IXGBE_MSCA_ST_CODE_SHIFT 28 /* ST Code shift */ +#define IXGBE_MSCA_NEW_PROTOCOL 0x00000000 /* ST CODE 00 (new prot) */ +#define IXGBE_MSCA_OLD_PROTOCOL 0x10000000 /* ST CODE 01 (old prot) */ +#define IXGBE_MSCA_MDI_COMMAND 0x40000000 /* Initiate MDI command */ +#define IXGBE_MSCA_MDI_IN_PROG_EN 0x80000000 /* MDI in progress ena */ + +/* MSRWD bit masks */ +#define IXGBE_MSRWD_WRITE_DATA_MASK 0x0000FFFF +#define IXGBE_MSRWD_WRITE_DATA_SHIFT 0 +#define IXGBE_MSRWD_READ_DATA_MASK 0xFFFF0000 +#define IXGBE_MSRWD_READ_DATA_SHIFT 16 + +/* Atlas registers */ +#define IXGBE_ATLAS_PDN_LPBK 0x24 +#define IXGBE_ATLAS_PDN_10G 0xB +#define IXGBE_ATLAS_PDN_1G 0xC +#define IXGBE_ATLAS_PDN_AN 0xD + +/* Atlas bit masks */ +#define IXGBE_ATLASCTL_WRITE_CMD 0x00010000 +#define IXGBE_ATLAS_PDN_TX_REG_EN 0x10 +#define IXGBE_ATLAS_PDN_TX_10G_QL_ALL 0xF0 +#define IXGBE_ATLAS_PDN_TX_1G_QL_ALL 0xF0 +#define IXGBE_ATLAS_PDN_TX_AN_QL_ALL 0xF0 + +/* Omer bit masks */ +#define IXGBE_CORECTL_WRITE_CMD 0x00010000 + +/* Device Type definitions for new protocol MDIO commands */ +#define IXGBE_MDIO_PMA_PMD_DEV_TYPE 0x1 +#define IXGBE_MDIO_PCS_DEV_TYPE 0x3 +#define IXGBE_MDIO_PHY_XS_DEV_TYPE 0x4 +#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE 0x7 +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE 0x1E /* Device 30 */ +#define IXGBE_TWINAX_DEV 1 + +#define IXGBE_MDIO_COMMAND_TIMEOUT 100 /* PHY Timeout for 1 GB mode */ + +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL 0x0 /* VS1 Ctrl Reg */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS 0x1 /* VS1 Status Reg */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS 0x0008 /* 1 = Link Up */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0-10G, 1-1G */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED 0x0018 +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED 0x0010 + +#define IXGBE_MDIO_AUTO_NEG_CONTROL 0x0 /* AUTO_NEG Control Reg */ +#define IXGBE_MDIO_AUTO_NEG_STATUS 0x1 /* AUTO_NEG Status Reg */ +#define IXGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */ +#define IXGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Status Reg */ +#define IXGBE_MDIO_PHY_XS_CONTROL 0x0 /* PHY_XS Control Reg */ +#define IXGBE_MDIO_PHY_XS_RESET 0x8000 /* PHY_XS Reset */ +#define IXGBE_MDIO_PHY_ID_HIGH 0x2 /* PHY ID High Reg*/ +#define IXGBE_MDIO_PHY_ID_LOW 0x3 /* PHY ID Low Reg*/ +#define IXGBE_MDIO_PHY_SPEED_ABILITY 0x4 /* Speed Ability Reg */ +#define IXGBE_MDIO_PHY_SPEED_10G 0x0001 /* 10G capable */ +#define IXGBE_MDIO_PHY_SPEED_1G 0x0010 /* 1G capable */ +#define IXGBE_MDIO_PHY_SPEED_100M 0x0020 /* 100M capable */ +#define IXGBE_MDIO_PHY_EXT_ABILITY 0xB /* Ext Ability Reg */ +#define IXGBE_MDIO_PHY_10GBASET_ABILITY 0x0004 /* 10GBaseT capable */ +#define IXGBE_MDIO_PHY_1000BASET_ABILITY 0x0020 /* 1000BaseT capable */ +#define IXGBE_MDIO_PHY_100BASETX_ABILITY 0x0080 /* 100BaseTX capable */ +#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE 0x0800 /* Set low power mode */ + +#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR 0x0000 /* PMA/PMD Control Reg */ +#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ +#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ +#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT 0xC30C /* PHY_XS SDA/SCL Status Reg */ + +/* MII clause 22/28 definitions */ +#define IXGBE_MDIO_PHY_LOW_POWER_MODE 0x0800 + +#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG 0x20 /* 10G Control Reg */ +#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */ +#define IXGBE_MII_AUTONEG_XNP_TX_REG 0x17 /* 1G XNP Transmit */ +#define IXGBE_MII_AUTONEG_ADVERTISE_REG 0x10 /* 100M Advertisement */ +#define IXGBE_MII_10GBASE_T_ADVERTISE 0x1000 /* full duplex, bit:12*/ +#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX 0x4000 /* full duplex, bit:14*/ +#define IXGBE_MII_1GBASE_T_ADVERTISE 0x8000 /* full duplex, bit:15*/ +#define IXGBE_MII_100BASE_T_ADVERTISE 0x0100 /* full duplex, bit:8 */ +#define IXGBE_MII_100BASE_T_ADVERTISE_HALF 0x0080 /* half duplex, bit:7 */ +#define IXGBE_MII_RESTART 0x200 +#define IXGBE_MII_AUTONEG_COMPLETE 0x20 +#define IXGBE_MII_AUTONEG_LINK_UP 0x04 +#define IXGBE_MII_AUTONEG_REG 0x0 + +#define IXGBE_PHY_REVISION_MASK 0xFFFFFFF0 +#define IXGBE_MAX_PHY_ADDR 32 + +/* PHY IDs*/ +#define TN1010_PHY_ID 0x00A19410 +#define TNX_FW_REV 0xB +#define X540_PHY_ID 0x01540200 +#define AQ_FW_REV 0x20 +#define QT2022_PHY_ID 0x0043A400 +#define ATH_PHY_ID 0x03429050 + +/* PHY Types */ +#define IXGBE_M88E1145_E_PHY_ID 0x01410CD0 + +/* Special PHY Init Routine */ +#define IXGBE_PHY_INIT_OFFSET_NL 0x002B +#define IXGBE_PHY_INIT_END_NL 0xFFFF +#define IXGBE_CONTROL_MASK_NL 0xF000 +#define IXGBE_DATA_MASK_NL 0x0FFF +#define IXGBE_CONTROL_SHIFT_NL 12 +#define IXGBE_DELAY_NL 0 +#define IXGBE_DATA_NL 1 +#define IXGBE_CONTROL_NL 0x000F +#define IXGBE_CONTROL_EOL_NL 0x0FFF +#define IXGBE_CONTROL_SOL_NL 0x0000 + +/* General purpose Interrupt Enable */ +#define IXGBE_SDP0_GPIEN 0x00000001 /* SDP0 */ +#define IXGBE_SDP1_GPIEN 0x00000002 /* SDP1 */ +#define IXGBE_SDP2_GPIEN 0x00000004 /* SDP2 */ +#define IXGBE_GPIE_MSIX_MODE 0x00000010 /* MSI-X mode */ +#define IXGBE_GPIE_OCD 0x00000020 /* Other Clear Disable */ +#define IXGBE_GPIE_EIMEN 0x00000040 /* Immediate Interrupt Enable */ +#define IXGBE_GPIE_EIAME 0x40000000 +#define IXGBE_GPIE_PBA_SUPPORT 0x80000000 +#define IXGBE_GPIE_RSC_DELAY_SHIFT 11 +#define IXGBE_GPIE_VTMODE_MASK 0x0000C000 /* VT Mode Mask */ +#define IXGBE_GPIE_VTMODE_16 0x00004000 /* 16 VFs 8 queues per VF */ +#define IXGBE_GPIE_VTMODE_32 0x00008000 /* 32 VFs 4 queues per VF */ +#define IXGBE_GPIE_VTMODE_64 0x0000C000 /* 64 VFs 2 queues per VF */ + +/* Packet Buffer Initialization */ +#define IXGBE_MAX_PACKET_BUFFERS 8 + +#define IXGBE_TXPBSIZE_20KB 0x00005000 /* 20KB Packet Buffer */ +#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */ +#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */ +#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */ +#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */ +#define IXGBE_RXPBSIZE_128KB 0x00020000 /* 128KB Packet Buffer */ +#define IXGBE_RXPBSIZE_MAX 0x00080000 /* 512KB Packet Buffer */ +#define IXGBE_TXPBSIZE_MAX 0x00028000 /* 160KB Packet Buffer */ + +#define IXGBE_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */ +#define IXGBE_MAX_PB 8 + +/* Packet buffer allocation strategies */ +enum { + PBA_STRATEGY_EQUAL = 0, /* Distribute PB space equally */ +#define PBA_STRATEGY_EQUAL PBA_STRATEGY_EQUAL + PBA_STRATEGY_WEIGHTED = 1, /* Weight front half of TCs */ +#define PBA_STRATEGY_WEIGHTED PBA_STRATEGY_WEIGHTED +}; + +/* Transmit Flow Control status */ +#define IXGBE_TFCS_TXOFF 0x00000001 +#define IXGBE_TFCS_TXOFF0 0x00000100 +#define IXGBE_TFCS_TXOFF1 0x00000200 +#define IXGBE_TFCS_TXOFF2 0x00000400 +#define IXGBE_TFCS_TXOFF3 0x00000800 +#define IXGBE_TFCS_TXOFF4 0x00001000 +#define IXGBE_TFCS_TXOFF5 0x00002000 +#define IXGBE_TFCS_TXOFF6 0x00004000 +#define IXGBE_TFCS_TXOFF7 0x00008000 + +/* TCP Timer */ +#define IXGBE_TCPTIMER_KS 0x00000100 +#define IXGBE_TCPTIMER_COUNT_ENABLE 0x00000200 +#define IXGBE_TCPTIMER_COUNT_FINISH 0x00000400 +#define IXGBE_TCPTIMER_LOOP 0x00000800 +#define IXGBE_TCPTIMER_DURATION_MASK 0x000000FF + +/* HLREG0 Bit Masks */ +#define IXGBE_HLREG0_TXCRCEN 0x00000001 /* bit 0 */ +#define IXGBE_HLREG0_RXCRCSTRP 0x00000002 /* bit 1 */ +#define IXGBE_HLREG0_JUMBOEN 0x00000004 /* bit 2 */ +#define IXGBE_HLREG0_TXPADEN 0x00000400 /* bit 10 */ +#define IXGBE_HLREG0_TXPAUSEEN 0x00001000 /* bit 12 */ +#define IXGBE_HLREG0_RXPAUSEEN 0x00004000 /* bit 14 */ +#define IXGBE_HLREG0_LPBK 0x00008000 /* bit 15 */ +#define IXGBE_HLREG0_MDCSPD 0x00010000 /* bit 16 */ +#define IXGBE_HLREG0_CONTMDC 0x00020000 /* bit 17 */ +#define IXGBE_HLREG0_CTRLFLTR 0x00040000 /* bit 18 */ +#define IXGBE_HLREG0_PREPEND 0x00F00000 /* bits 20-23 */ +#define IXGBE_HLREG0_PRIPAUSEEN 0x01000000 /* bit 24 */ +#define IXGBE_HLREG0_RXPAUSERECDA 0x06000000 /* bits 25-26 */ +#define IXGBE_HLREG0_RXLNGTHERREN 0x08000000 /* bit 27 */ +#define IXGBE_HLREG0_RXPADSTRIPEN 0x10000000 /* bit 28 */ + +/* VMD_CTL bitmasks */ +#define IXGBE_VMD_CTL_VMDQ_EN 0x00000001 +#define IXGBE_VMD_CTL_VMDQ_FILTER 0x00000002 + +/* VT_CTL bitmasks */ +#define IXGBE_VT_CTL_DIS_DEFPL 0x20000000 /* disable default pool */ +#define IXGBE_VT_CTL_REPLEN 0x40000000 /* replication enabled */ +#define IXGBE_VT_CTL_VT_ENABLE 0x00000001 /* Enable VT Mode */ +#define IXGBE_VT_CTL_POOL_SHIFT 7 +#define IXGBE_VT_CTL_POOL_MASK (0x3F << IXGBE_VT_CTL_POOL_SHIFT) + +/* VMOLR bitmasks */ +#define IXGBE_VMOLR_AUPE 0x01000000 /* accept untagged packets */ +#define IXGBE_VMOLR_ROMPE 0x02000000 /* accept packets in MTA tbl */ +#define IXGBE_VMOLR_ROPE 0x04000000 /* accept packets in UC tbl */ +#define IXGBE_VMOLR_BAM 0x08000000 /* accept broadcast packets */ +#define IXGBE_VMOLR_MPE 0x10000000 /* multicast promiscuous */ + +/* VFRE bitmask */ +#define IXGBE_VFRE_ENABLE_ALL 0xFFFFFFFF + +#define IXGBE_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ + +/* RDHMPN and TDHMPN bitmasks */ +#define IXGBE_RDHMPN_RDICADDR 0x007FF800 +#define IXGBE_RDHMPN_RDICRDREQ 0x00800000 +#define IXGBE_RDHMPN_RDICADDR_SHIFT 11 +#define IXGBE_TDHMPN_TDICADDR 0x003FF800 +#define IXGBE_TDHMPN_TDICRDREQ 0x00800000 +#define IXGBE_TDHMPN_TDICADDR_SHIFT 11 + +#define IXGBE_RDMAM_MEM_SEL_SHIFT 13 +#define IXGBE_RDMAM_DWORD_SHIFT 9 +#define IXGBE_RDMAM_DESC_COMP_FIFO 1 +#define IXGBE_RDMAM_DFC_CMD_FIFO 2 +#define IXGBE_RDMAM_RSC_HEADER_ADDR 3 +#define IXGBE_RDMAM_TCN_STATUS_RAM 4 +#define IXGBE_RDMAM_WB_COLL_FIFO 5 +#define IXGBE_RDMAM_QSC_CNT_RAM 6 +#define IXGBE_RDMAM_QSC_FCOE_RAM 7 +#define IXGBE_RDMAM_QSC_QUEUE_CNT 8 +#define IXGBE_RDMAM_QSC_QUEUE_RAM 0xA +#define IXGBE_RDMAM_QSC_RSC_RAM 0xB +#define IXGBE_RDMAM_DESC_COM_FIFO_RANGE 135 +#define IXGBE_RDMAM_DESC_COM_FIFO_COUNT 4 +#define IXGBE_RDMAM_DFC_CMD_FIFO_RANGE 48 +#define IXGBE_RDMAM_DFC_CMD_FIFO_COUNT 7 +#define IXGBE_RDMAM_RSC_HEADER_ADDR_RANGE 32 +#define IXGBE_RDMAM_RSC_HEADER_ADDR_COUNT 4 +#define IXGBE_RDMAM_TCN_STATUS_RAM_RANGE 256 +#define IXGBE_RDMAM_TCN_STATUS_RAM_COUNT 9 +#define IXGBE_RDMAM_WB_COLL_FIFO_RANGE 8 +#define IXGBE_RDMAM_WB_COLL_FIFO_COUNT 4 +#define IXGBE_RDMAM_QSC_CNT_RAM_RANGE 64 +#define IXGBE_RDMAM_QSC_CNT_RAM_COUNT 4 +#define IXGBE_RDMAM_QSC_FCOE_RAM_RANGE 512 +#define IXGBE_RDMAM_QSC_FCOE_RAM_COUNT 5 +#define IXGBE_RDMAM_QSC_QUEUE_CNT_RANGE 32 +#define IXGBE_RDMAM_QSC_QUEUE_CNT_COUNT 4 +#define IXGBE_RDMAM_QSC_QUEUE_RAM_RANGE 128 +#define IXGBE_RDMAM_QSC_QUEUE_RAM_COUNT 8 +#define IXGBE_RDMAM_QSC_RSC_RAM_RANGE 32 +#define IXGBE_RDMAM_QSC_RSC_RAM_COUNT 8 + +#define IXGBE_TXDESCIC_READY 0x80000000 + +/* Receive Checksum Control */ +#define IXGBE_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ +#define IXGBE_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ + +/* FCRTL Bit Masks */ +#define IXGBE_FCRTL_XONE 0x80000000 /* XON enable */ +#define IXGBE_FCRTH_FCEN 0x80000000 /* Packet buffer fc enable */ + +/* PAP bit masks*/ +#define IXGBE_PAP_TXPAUSECNT_MASK 0x0000FFFF /* Pause counter mask */ + +/* RMCS Bit Masks */ +#define IXGBE_RMCS_RRM 0x00000002 /* Rx Recycle Mode enable */ +/* Receive Arbitration Control: 0 Round Robin, 1 DFP */ +#define IXGBE_RMCS_RAC 0x00000004 +/* Deficit Fixed Prio ena */ +#define IXGBE_RMCS_DFP IXGBE_RMCS_RAC +#define IXGBE_RMCS_TFCE_802_3X 0x00000008 /* Tx Priority FC ena */ +#define IXGBE_RMCS_TFCE_PRIORITY 0x00000010 /* Tx Priority FC ena */ +#define IXGBE_RMCS_ARBDIS 0x00000040 /* Arbitration disable bit */ + +/* FCCFG Bit Masks */ +#define IXGBE_FCCFG_TFCE_802_3X 0x00000008 /* Tx link FC enable */ +#define IXGBE_FCCFG_TFCE_PRIORITY 0x00000010 /* Tx priority FC enable */ + +/* Interrupt register bitmasks */ + +/* Extended Interrupt Cause Read */ +#define IXGBE_EICR_RTX_QUEUE 0x0000FFFF /* RTx Queue Interrupt */ +#define IXGBE_EICR_FLOW_DIR 0x00010000 /* FDir Exception */ +#define IXGBE_EICR_RX_MISS 0x00020000 /* Packet Buffer Overrun */ +#define IXGBE_EICR_PCI 0x00040000 /* PCI Exception */ +#define IXGBE_EICR_MAILBOX 0x00080000 /* VF to PF Mailbox Interrupt */ +#define IXGBE_EICR_LSC 0x00100000 /* Link Status Change */ +#define IXGBE_EICR_LINKSEC 0x00200000 /* PN Threshold */ +#define IXGBE_EICR_MNG 0x00400000 /* Manageability Event Interrupt */ +#define IXGBE_EICR_TS 0x00800000 /* Thermal Sensor Event */ +#define IXGBE_EICR_GPI_SDP0 0x01000000 /* Gen Purpose Interrupt on SDP0 */ +#define IXGBE_EICR_GPI_SDP1 0x02000000 /* Gen Purpose Interrupt on SDP1 */ +#define IXGBE_EICR_GPI_SDP2 0x04000000 /* Gen Purpose Interrupt on SDP2 */ +#define IXGBE_EICR_ECC 0x10000000 /* ECC Error */ +#define IXGBE_EICR_PBUR 0x10000000 /* Packet Buffer Handler Error */ +#define IXGBE_EICR_DHER 0x20000000 /* Descriptor Handler Error */ +#define IXGBE_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ +#define IXGBE_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ + +/* Extended Interrupt Cause Set */ +#define IXGBE_EICS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ +#define IXGBE_EICS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ +#define IXGBE_EICS_RX_MISS IXGBE_EICR_RX_MISS /* Pkt Buffer Overrun */ +#define IXGBE_EICS_PCI IXGBE_EICR_PCI /* PCI Exception */ +#define IXGBE_EICS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ +#define IXGBE_EICS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EICS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ +#define IXGBE_EICS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ +#define IXGBE_EICS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ +#define IXGBE_EICS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ +#define IXGBE_EICS_ECC IXGBE_EICR_ECC /* ECC Error */ +#define IXGBE_EICS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ +#define IXGBE_EICS_DHER IXGBE_EICR_DHER /* Desc Handler Error */ +#define IXGBE_EICS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ +#define IXGBE_EICS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ + +/* Extended Interrupt Mask Set */ +#define IXGBE_EIMS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ +#define IXGBE_EIMS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ +#define IXGBE_EIMS_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */ +#define IXGBE_EIMS_PCI IXGBE_EICR_PCI /* PCI Exception */ +#define IXGBE_EIMS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ +#define IXGBE_EIMS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ +#define IXGBE_EIMS_TS IXGBE_EICR_TS /* Thermal Sensor Event */ +#define IXGBE_EIMS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ +#define IXGBE_EIMS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ +#define IXGBE_EIMS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ +#define IXGBE_EIMS_ECC IXGBE_EICR_ECC /* ECC Error */ +#define IXGBE_EIMS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ +#define IXGBE_EIMS_DHER IXGBE_EICR_DHER /* Descr Handler Error */ +#define IXGBE_EIMS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ +#define IXGBE_EIMS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ + +/* Extended Interrupt Mask Clear */ +#define IXGBE_EIMC_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ +#define IXGBE_EIMC_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ +#define IXGBE_EIMC_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */ +#define IXGBE_EIMC_PCI IXGBE_EICR_PCI /* PCI Exception */ +#define IXGBE_EIMC_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ +#define IXGBE_EIMC_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMC_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ +#define IXGBE_EIMC_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ +#define IXGBE_EIMC_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ +#define IXGBE_EIMC_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ +#define IXGBE_EIMC_ECC IXGBE_EICR_ECC /* ECC Error */ +#define IXGBE_EIMC_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ +#define IXGBE_EIMC_DHER IXGBE_EICR_DHER /* Desc Handler Err */ +#define IXGBE_EIMC_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ +#define IXGBE_EIMC_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ + +#define IXGBE_EIMS_ENABLE_MASK ( \ + IXGBE_EIMS_RTX_QUEUE | \ + IXGBE_EIMS_LSC | \ + IXGBE_EIMS_TCP_TIMER | \ + IXGBE_EIMS_OTHER) + +/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ +#define IXGBE_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */ +#define IXGBE_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */ +#define IXGBE_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ +#define IXGBE_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */ +#define IXGBE_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */ +#define IXGBE_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */ +#define IXGBE_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */ +#define IXGBE_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */ +#define IXGBE_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */ +#define IXGBE_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of control bits */ +#define IXGBE_IMIR_SIZE_BP_82599 0x00001000 /* Packet size bypass */ +#define IXGBE_IMIR_CTRL_URG_82599 0x00002000 /* Check URG bit in header */ +#define IXGBE_IMIR_CTRL_ACK_82599 0x00004000 /* Check ACK bit in header */ +#define IXGBE_IMIR_CTRL_PSH_82599 0x00008000 /* Check PSH bit in header */ +#define IXGBE_IMIR_CTRL_RST_82599 0x00010000 /* Check RST bit in header */ +#define IXGBE_IMIR_CTRL_SYN_82599 0x00020000 /* Check SYN bit in header */ +#define IXGBE_IMIR_CTRL_FIN_82599 0x00040000 /* Check FIN bit in header */ +#define IXGBE_IMIR_CTRL_BP_82599 0x00080000 /* Bypass chk of ctrl bits */ +#define IXGBE_IMIR_LLI_EN_82599 0x00100000 /* Enables low latency Int */ +#define IXGBE_IMIR_RX_QUEUE_MASK_82599 0x0000007F /* Rx Queue Mask */ +#define IXGBE_IMIR_RX_QUEUE_SHIFT_82599 21 /* Rx Queue Shift */ +#define IXGBE_IMIRVP_PRIORITY_MASK 0x00000007 /* VLAN priority mask */ +#define IXGBE_IMIRVP_PRIORITY_EN 0x00000008 /* VLAN priority enable */ + +#define IXGBE_MAX_FTQF_FILTERS 128 +#define IXGBE_FTQF_PROTOCOL_MASK 0x00000003 +#define IXGBE_FTQF_PROTOCOL_TCP 0x00000000 +#define IXGBE_FTQF_PROTOCOL_UDP 0x00000001 +#define IXGBE_FTQF_PROTOCOL_SCTP 2 +#define IXGBE_FTQF_PRIORITY_MASK 0x00000007 +#define IXGBE_FTQF_PRIORITY_SHIFT 2 +#define IXGBE_FTQF_POOL_MASK 0x0000003F +#define IXGBE_FTQF_POOL_SHIFT 8 +#define IXGBE_FTQF_5TUPLE_MASK_MASK 0x0000001F +#define IXGBE_FTQF_5TUPLE_MASK_SHIFT 25 +#define IXGBE_FTQF_SOURCE_ADDR_MASK 0x1E +#define IXGBE_FTQF_DEST_ADDR_MASK 0x1D +#define IXGBE_FTQF_SOURCE_PORT_MASK 0x1B +#define IXGBE_FTQF_DEST_PORT_MASK 0x17 +#define IXGBE_FTQF_PROTOCOL_COMP_MASK 0x0F +#define IXGBE_FTQF_POOL_MASK_EN 0x40000000 +#define IXGBE_FTQF_QUEUE_ENABLE 0x80000000 + +/* Interrupt clear mask */ +#define IXGBE_IRQ_CLEAR_MASK 0xFFFFFFFF + +/* Interrupt Vector Allocation Registers */ +#define IXGBE_IVAR_REG_NUM 25 +#define IXGBE_IVAR_REG_NUM_82599 64 +#define IXGBE_IVAR_TXRX_ENTRY 96 +#define IXGBE_IVAR_RX_ENTRY 64 +#define IXGBE_IVAR_RX_QUEUE(_i) (0 + (_i)) +#define IXGBE_IVAR_TX_QUEUE(_i) (64 + (_i)) +#define IXGBE_IVAR_TX_ENTRY 32 + +#define IXGBE_IVAR_TCP_TIMER_INDEX 96 /* 0 based index */ +#define IXGBE_IVAR_OTHER_CAUSES_INDEX 97 /* 0 based index */ + +#define IXGBE_MSIX_VECTOR(_i) (0 + (_i)) + +#define IXGBE_IVAR_ALLOC_VAL 0x80 /* Interrupt Allocation valid */ + +/* ETYPE Queue Filter/Select Bit Masks */ +#define IXGBE_MAX_ETQF_FILTERS 8 +#define IXGBE_ETQF_FCOE 0x08000000 /* bit 27 */ +#define IXGBE_ETQF_BCN 0x10000000 /* bit 28 */ +#define IXGBE_ETQF_1588 0x40000000 /* bit 30 */ +#define IXGBE_ETQF_FILTER_EN 0x80000000 /* bit 31 */ +#define IXGBE_ETQF_POOL_ENABLE (1 << 26) /* bit 26 */ + +#define IXGBE_ETQS_RX_QUEUE 0x007F0000 /* bits 22:16 */ +#define IXGBE_ETQS_RX_QUEUE_SHIFT 16 +#define IXGBE_ETQS_LLI 0x20000000 /* bit 29 */ +#define IXGBE_ETQS_QUEUE_EN 0x80000000 /* bit 31 */ + +/* + * ETQF filter list: one static filter per filter consumer. This is + * to avoid filter collisions later. Add new filters + * here!! + * + * Current filters: + * EAPOL 802.1x (0x888e): Filter 0 + * FCoE (0x8906): Filter 2 + * 1588 (0x88f7): Filter 3 + * FIP (0x8914): Filter 4 + */ +#define IXGBE_ETQF_FILTER_EAPOL 0 +#define IXGBE_ETQF_FILTER_FCOE 2 +#define IXGBE_ETQF_FILTER_1588 3 +#define IXGBE_ETQF_FILTER_FIP 4 +/* VLAN Control Bit Masks */ +#define IXGBE_VLNCTRL_VET 0x0000FFFF /* bits 0-15 */ +#define IXGBE_VLNCTRL_CFI 0x10000000 /* bit 28 */ +#define IXGBE_VLNCTRL_CFIEN 0x20000000 /* bit 29 */ +#define IXGBE_VLNCTRL_VFE 0x40000000 /* bit 30 */ +#define IXGBE_VLNCTRL_VME 0x80000000 /* bit 31 */ + +/* VLAN pool filtering masks */ +#define IXGBE_VLVF_VIEN 0x80000000 /* filter is valid */ +#define IXGBE_VLVF_ENTRIES 64 +#define IXGBE_VLVF_VLANID_MASK 0x00000FFF +/* Per VF Port VLAN insertion rules */ +#define IXGBE_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ +#define IXGBE_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ + +#define IXGBE_ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.1q protocol */ + +/* STATUS Bit Masks */ +#define IXGBE_STATUS_LAN_ID 0x0000000C /* LAN ID */ +#define IXGBE_STATUS_LAN_ID_SHIFT 2 /* LAN ID Shift*/ +#define IXGBE_STATUS_GIO 0x00080000 /* GIO Master Ena Status */ + +#define IXGBE_STATUS_LAN_ID_0 0x00000000 /* LAN ID 0 */ +#define IXGBE_STATUS_LAN_ID_1 0x00000004 /* LAN ID 1 */ + +/* ESDP Bit Masks */ +#define IXGBE_ESDP_SDP0 0x00000001 /* SDP0 Data Value */ +#define IXGBE_ESDP_SDP1 0x00000002 /* SDP1 Data Value */ +#define IXGBE_ESDP_SDP2 0x00000004 /* SDP2 Data Value */ +#define IXGBE_ESDP_SDP3 0x00000008 /* SDP3 Data Value */ +#define IXGBE_ESDP_SDP4 0x00000010 /* SDP4 Data Value */ +#define IXGBE_ESDP_SDP5 0x00000020 /* SDP5 Data Value */ +#define IXGBE_ESDP_SDP6 0x00000040 /* SDP6 Data Value */ +#define IXGBE_ESDP_SDP0_DIR 0x00000100 /* SDP0 IO direction */ +#define IXGBE_ESDP_SDP1_DIR 0x00000200 /* SDP1 IO direction */ +#define IXGBE_ESDP_SDP4_DIR 0x00001000 /* SDP4 IO direction */ +#define IXGBE_ESDP_SDP5_DIR 0x00002000 /* SDP5 IO direction */ +#define IXGBE_ESDP_SDP0_NATIVE 0x00010000 /* SDP0 IO mode */ +#define IXGBE_ESDP_SDP1_NATIVE 0x00020000 /* SDP1 IO mode */ + + +/* LEDCTL Bit Masks */ +#define IXGBE_LED_IVRT_BASE 0x00000040 +#define IXGBE_LED_BLINK_BASE 0x00000080 +#define IXGBE_LED_MODE_MASK_BASE 0x0000000F +#define IXGBE_LED_OFFSET(_base, _i) (_base << (8 * (_i))) +#define IXGBE_LED_MODE_SHIFT(_i) (8*(_i)) +#define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i) +#define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i) +#define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i) + +/* LED modes */ +#define IXGBE_LED_LINK_UP 0x0 +#define IXGBE_LED_LINK_10G 0x1 +#define IXGBE_LED_MAC 0x2 +#define IXGBE_LED_FILTER 0x3 +#define IXGBE_LED_LINK_ACTIVE 0x4 +#define IXGBE_LED_LINK_1G 0x5 +#define IXGBE_LED_ON 0xE +#define IXGBE_LED_OFF 0xF + +/* AUTOC Bit Masks */ +#define IXGBE_AUTOC_KX4_KX_SUPP_MASK 0xC0000000 +#define IXGBE_AUTOC_KX4_SUPP 0x80000000 +#define IXGBE_AUTOC_KX_SUPP 0x40000000 +#define IXGBE_AUTOC_PAUSE 0x30000000 +#define IXGBE_AUTOC_ASM_PAUSE 0x20000000 +#define IXGBE_AUTOC_SYM_PAUSE 0x10000000 +#define IXGBE_AUTOC_RF 0x08000000 +#define IXGBE_AUTOC_PD_TMR 0x06000000 +#define IXGBE_AUTOC_AN_RX_LOOSE 0x01000000 +#define IXGBE_AUTOC_AN_RX_DRIFT 0x00800000 +#define IXGBE_AUTOC_AN_RX_ALIGN 0x007C0000 +#define IXGBE_AUTOC_FECA 0x00040000 +#define IXGBE_AUTOC_FECR 0x00020000 +#define IXGBE_AUTOC_KR_SUPP 0x00010000 +#define IXGBE_AUTOC_AN_RESTART 0x00001000 +#define IXGBE_AUTOC_FLU 0x00000001 +#define IXGBE_AUTOC_LMS_SHIFT 13 +#define IXGBE_AUTOC_LMS_10G_SERIAL (0x3 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_KX_KR (0x4 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_SGMII_1G_100M (0x5 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII (0x7 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_MASK (0x7 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_1G_LINK_NO_AN (0x0 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_10G_LINK_NO_AN (0x1 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_1G_AN (0x2 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_AN (0x4 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_AN_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_ATTACH_TYPE (0x7 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) + +#define IXGBE_AUTOC_1G_PMA_PMD_MASK 0x00000200 +#define IXGBE_AUTOC_1G_PMA_PMD_SHIFT 9 +#define IXGBE_AUTOC_10G_PMA_PMD_MASK 0x00000180 +#define IXGBE_AUTOC_10G_PMA_PMD_SHIFT 7 +#define IXGBE_AUTOC_10G_XAUI (0x0 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_10G_KX4 (0x1 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_10G_CX4 (0x2 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_BX (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_KX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_SFI (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_KX_BX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) + +#define IXGBE_AUTOC2_UPPER_MASK 0xFFFF0000 +#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK 0x00030000 +#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT 16 +#define IXGBE_AUTOC2_10G_KR (0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) + +#define IXGBE_MACC_FLU 0x00000001 +#define IXGBE_MACC_FSV_10G 0x00030000 +#define IXGBE_MACC_FS 0x00040000 +#define IXGBE_MAC_RX2TX_LPBK 0x00000002 + +/* LINKS Bit Masks */ +#define IXGBE_LINKS_KX_AN_COMP 0x80000000 +#define IXGBE_LINKS_UP 0x40000000 +#define IXGBE_LINKS_SPEED 0x20000000 +#define IXGBE_LINKS_MODE 0x18000000 +#define IXGBE_LINKS_RX_MODE 0x06000000 +#define IXGBE_LINKS_TX_MODE 0x01800000 +#define IXGBE_LINKS_XGXS_EN 0x00400000 +#define IXGBE_LINKS_SGMII_EN 0x02000000 +#define IXGBE_LINKS_PCS_1G_EN 0x00200000 +#define IXGBE_LINKS_1G_AN_EN 0x00100000 +#define IXGBE_LINKS_KX_AN_IDLE 0x00080000 +#define IXGBE_LINKS_1G_SYNC 0x00040000 +#define IXGBE_LINKS_10G_ALIGN 0x00020000 +#define IXGBE_LINKS_10G_LANE_SYNC 0x00017000 +#define IXGBE_LINKS_TL_FAULT 0x00001000 +#define IXGBE_LINKS_SIGNAL 0x00000F00 + +#define IXGBE_LINKS_SPEED_82599 0x30000000 +#define IXGBE_LINKS_SPEED_10G_82599 0x30000000 +#define IXGBE_LINKS_SPEED_1G_82599 0x20000000 +#define IXGBE_LINKS_SPEED_100_82599 0x10000000 +#define IXGBE_LINK_UP_TIME 90 /* 9.0 Seconds */ +#define IXGBE_AUTO_NEG_TIME 45 /* 4.5 Seconds */ + +#define IXGBE_LINKS2_AN_SUPPORTED 0x00000040 + +/* PCS1GLSTA Bit Masks */ +#define IXGBE_PCS1GLSTA_LINK_OK 1 +#define IXGBE_PCS1GLSTA_SYNK_OK 0x10 +#define IXGBE_PCS1GLSTA_AN_COMPLETE 0x10000 +#define IXGBE_PCS1GLSTA_AN_PAGE_RX 0x20000 +#define IXGBE_PCS1GLSTA_AN_TIMED_OUT 0x40000 +#define IXGBE_PCS1GLSTA_AN_REMOTE_FAULT 0x80000 +#define IXGBE_PCS1GLSTA_AN_ERROR_RWS 0x100000 + +#define IXGBE_PCS1GANA_SYM_PAUSE 0x80 +#define IXGBE_PCS1GANA_ASM_PAUSE 0x100 + +/* PCS1GLCTL Bit Masks */ +#define IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN 0x00040000 /* PCS 1G autoneg to en */ +#define IXGBE_PCS1GLCTL_FLV_LINK_UP 1 +#define IXGBE_PCS1GLCTL_FORCE_LINK 0x20 +#define IXGBE_PCS1GLCTL_LOW_LINK_LATCH 0x40 +#define IXGBE_PCS1GLCTL_AN_ENABLE 0x10000 +#define IXGBE_PCS1GLCTL_AN_RESTART 0x20000 + +/* ANLP1 Bit Masks */ +#define IXGBE_ANLP1_PAUSE 0x0C00 +#define IXGBE_ANLP1_SYM_PAUSE 0x0400 +#define IXGBE_ANLP1_ASM_PAUSE 0x0800 +#define IXGBE_ANLP1_AN_STATE_MASK 0x000f0000 + +/* SW Semaphore Register bitmasks */ +#define IXGBE_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ +#define IXGBE_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ +#define IXGBE_SWSM_WMNG 0x00000004 /* Wake MNG Clock */ +#define IXGBE_SWFW_REGSMP 0x80000000 /* Register Semaphore bit 31 */ + +/* SW_FW_SYNC/GSSR definitions */ +#define IXGBE_GSSR_EEP_SM 0x0001 +#define IXGBE_GSSR_PHY0_SM 0x0002 +#define IXGBE_GSSR_PHY1_SM 0x0004 +#define IXGBE_GSSR_MAC_CSR_SM 0x0008 +#define IXGBE_GSSR_FLASH_SM 0x0010 +#define IXGBE_GSSR_SW_MNG_SM 0x0400 + +/* FW Status register bitmask */ +#define IXGBE_FWSTS_FWRI 0x00000200 /* Firmware Reset Indication */ + +/* EEC Register */ +#define IXGBE_EEC_SK 0x00000001 /* EEPROM Clock */ +#define IXGBE_EEC_CS 0x00000002 /* EEPROM Chip Select */ +#define IXGBE_EEC_DI 0x00000004 /* EEPROM Data In */ +#define IXGBE_EEC_DO 0x00000008 /* EEPROM Data Out */ +#define IXGBE_EEC_FWE_MASK 0x00000030 /* FLASH Write Enable */ +#define IXGBE_EEC_FWE_DIS 0x00000010 /* Disable FLASH writes */ +#define IXGBE_EEC_FWE_EN 0x00000020 /* Enable FLASH writes */ +#define IXGBE_EEC_FWE_SHIFT 4 +#define IXGBE_EEC_REQ 0x00000040 /* EEPROM Access Request */ +#define IXGBE_EEC_GNT 0x00000080 /* EEPROM Access Grant */ +#define IXGBE_EEC_PRES 0x00000100 /* EEPROM Present */ +#define IXGBE_EEC_ARD 0x00000200 /* EEPROM Auto Read Done */ +#define IXGBE_EEC_FLUP 0x00800000 /* Flash update command */ +#define IXGBE_EEC_SEC1VAL 0x02000000 /* Sector 1 Valid */ +#define IXGBE_EEC_FLUDONE 0x04000000 /* Flash update done */ +/* EEPROM Addressing bits based on type (0-small, 1-large) */ +#define IXGBE_EEC_ADDR_SIZE 0x00000400 +#define IXGBE_EEC_SIZE 0x00007800 /* EEPROM Size */ +#define IXGBE_EERD_MAX_ADDR 0x00003FFF /* EERD alows 14 bits for addr. */ + +#define IXGBE_EEC_SIZE_SHIFT 11 +#define IXGBE_EEPROM_WORD_SIZE_SHIFT 6 +#define IXGBE_EEPROM_OPCODE_BITS 8 + +/* Part Number String Length */ +#define IXGBE_PBANUM_LENGTH 11 + +/* Checksum and EEPROM pointers */ +#define IXGBE_PBANUM_PTR_GUARD 0xFAFA +#define IXGBE_EEPROM_CHECKSUM 0x3F +#define IXGBE_EEPROM_SUM 0xBABA +#define IXGBE_PCIE_ANALOG_PTR 0x03 +#define IXGBE_ATLAS0_CONFIG_PTR 0x04 +#define IXGBE_PHY_PTR 0x04 +#define IXGBE_ATLAS1_CONFIG_PTR 0x05 +#define IXGBE_OPTION_ROM_PTR 0x05 +#define IXGBE_PCIE_GENERAL_PTR 0x06 +#define IXGBE_PCIE_CONFIG0_PTR 0x07 +#define IXGBE_PCIE_CONFIG1_PTR 0x08 +#define IXGBE_CORE0_PTR 0x09 +#define IXGBE_CORE1_PTR 0x0A +#define IXGBE_MAC0_PTR 0x0B +#define IXGBE_MAC1_PTR 0x0C +#define IXGBE_CSR0_CONFIG_PTR 0x0D +#define IXGBE_CSR1_CONFIG_PTR 0x0E +#define IXGBE_FW_PTR 0x0F +#define IXGBE_PBANUM0_PTR 0x15 +#define IXGBE_PBANUM1_PTR 0x16 +#define IXGBE_ALT_MAC_ADDR_PTR 0x37 +#define IXGBE_FREE_SPACE_PTR 0X3E + +/* External Thermal Sensor Config */ +#define IXGBE_ETS_CFG 0x26 +#define IXGBE_ETS_LTHRES_DELTA_MASK 0x07C0 +#define IXGBE_ETS_LTHRES_DELTA_SHIFT 6 +#define IXGBE_ETS_TYPE_MASK 0x0038 +#define IXGBE_ETS_TYPE_SHIFT 3 +#define IXGBE_ETS_TYPE_EMC 0x000 +#define IXGBE_ETS_NUM_SENSORS_MASK 0x0007 +#define IXGBE_ETS_DATA_LOC_MASK 0x3C00 +#define IXGBE_ETS_DATA_LOC_SHIFT 10 +#define IXGBE_ETS_DATA_INDEX_MASK 0x0300 +#define IXGBE_ETS_DATA_INDEX_SHIFT 8 +#define IXGBE_ETS_DATA_HTHRESH_MASK 0x00FF + +#define IXGBE_SAN_MAC_ADDR_PTR 0x28 +#define IXGBE_DEVICE_CAPS 0x2C +#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11 +#define IXGBE_PCIE_MSIX_82599_CAPS 0x72 +#define IXGBE_MAX_MSIX_VECTORS_82599 0x40 +#define IXGBE_PCIE_MSIX_82598_CAPS 0x62 +#define IXGBE_MAX_MSIX_VECTORS_82598 0x13 + +/* MSI-X capability fields masks */ +#define IXGBE_PCIE_MSIX_TBL_SZ_MASK 0x7FF + +/* Legacy EEPROM word offsets */ +#define IXGBE_ISCSI_BOOT_CAPS 0x0033 +#define IXGBE_ISCSI_SETUP_PORT_0 0x0030 +#define IXGBE_ISCSI_SETUP_PORT_1 0x0034 + +/* EEPROM Commands - SPI */ +#define IXGBE_EEPROM_MAX_RETRY_SPI 5000 /* Max wait 5ms for RDY signal */ +#define IXGBE_EEPROM_STATUS_RDY_SPI 0x01 +#define IXGBE_EEPROM_READ_OPCODE_SPI 0x03 /* EEPROM read opcode */ +#define IXGBE_EEPROM_WRITE_OPCODE_SPI 0x02 /* EEPROM write opcode */ +#define IXGBE_EEPROM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = addr bit-8 */ +#define IXGBE_EEPROM_WREN_OPCODE_SPI 0x06 /* EEPROM set Write Ena latch */ +/* EEPROM reset Write Enable latch */ +#define IXGBE_EEPROM_WRDI_OPCODE_SPI 0x04 +#define IXGBE_EEPROM_RDSR_OPCODE_SPI 0x05 /* EEPROM read Status reg */ +#define IXGBE_EEPROM_WRSR_OPCODE_SPI 0x01 /* EEPROM write Status reg */ +#define IXGBE_EEPROM_ERASE4K_OPCODE_SPI 0x20 /* EEPROM ERASE 4KB */ +#define IXGBE_EEPROM_ERASE64K_OPCODE_SPI 0xD8 /* EEPROM ERASE 64KB */ +#define IXGBE_EEPROM_ERASE256_OPCODE_SPI 0xDB /* EEPROM ERASE 256B */ + +/* EEPROM Read Register */ +#define IXGBE_EEPROM_RW_REG_DATA 16 /* data offset in EEPROM read reg */ +#define IXGBE_EEPROM_RW_REG_DONE 2 /* Offset to READ done bit */ +#define IXGBE_EEPROM_RW_REG_START 1 /* First bit to start operation */ +#define IXGBE_EEPROM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ +#define IXGBE_NVM_POLL_WRITE 1 /* Flag for polling for wr complete */ +#define IXGBE_NVM_POLL_READ 0 /* Flag for polling for rd complete */ + +#define IXGBE_ETH_LENGTH_OF_ADDRESS 6 + +#define IXGBE_EEPROM_PAGE_SIZE_MAX 128 +#define IXGBE_EEPROM_RD_BUFFER_MAX_COUNT 512 /* words rd in burst */ +#define IXGBE_EEPROM_WR_BUFFER_MAX_COUNT 256 /* words wr in burst */ + +#ifndef IXGBE_EEPROM_GRANT_ATTEMPTS +#define IXGBE_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM attempts to gain grant */ +#endif + +#ifndef IXGBE_EERD_EEWR_ATTEMPTS +/* Number of 5 microseconds we wait for EERD read and + * EERW write to complete */ +#define IXGBE_EERD_EEWR_ATTEMPTS 100000 +#endif + +#ifndef IXGBE_FLUDONE_ATTEMPTS +/* # attempts we wait for flush update to complete */ +#define IXGBE_FLUDONE_ATTEMPTS 20000 +#endif + +#define IXGBE_PCIE_CTRL2 0x5 /* PCIe Control 2 Offset */ +#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE 0x8 /* Dummy Function Enable */ +#define IXGBE_PCIE_CTRL2_LAN_DISABLE 0x2 /* LAN PCI Disable */ +#define IXGBE_PCIE_CTRL2_DISABLE_SELECT 0x1 /* LAN Disable Select */ + +#define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET 0x0 +#define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET 0x3 +#define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP 0x1 +#define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS 0x2 +#define IXGBE_FW_LESM_PARAMETERS_PTR 0x2 +#define IXGBE_FW_LESM_STATE_1 0x1 +#define IXGBE_FW_LESM_STATE_ENABLED 0x8000 /* LESM Enable bit */ +#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR 0x4 +#define IXGBE_FW_PATCH_VERSION_4 0x7 +#define IXGBE_FCOE_IBA_CAPS_BLK_PTR 0x33 /* iSCSI/FCOE block */ +#define IXGBE_FCOE_IBA_CAPS_FCOE 0x20 /* FCOE flags */ +#define IXGBE_ISCSI_FCOE_BLK_PTR 0x17 /* iSCSI/FCOE block */ +#define IXGBE_ISCSI_FCOE_FLAGS_OFFSET 0x0 /* FCOE flags */ +#define IXGBE_ISCSI_FCOE_FLAGS_ENABLE 0x1 /* FCOE flags enable bit */ +#define IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR 0x27 /* Alt. SAN MAC block */ +#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET 0x0 /* Alt SAN MAC capability */ +#define IXGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET 0x1 /* Alt SAN MAC 0 offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET 0x4 /* Alt SAN MAC 1 offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET 0x7 /* Alt WWNN prefix offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET 0x8 /* Alt WWPN prefix offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC 0x0 /* Alt SAN MAC exists */ +#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN 0x1 /* Alt WWN base exists */ + +#define IXGBE_DEVICE_CAPS_WOL_PORT0_1 0x4 /* WoL supported on ports 0 & 1 */ +#define IXGBE_DEVICE_CAPS_WOL_PORT0 0x8 /* WoL supported on port 0 */ +#define IXGBE_DEVICE_CAPS_WOL_MASK 0xC /* Mask for WoL capabilities */ + +/* PCI Bus Info */ +#define IXGBE_PCI_DEVICE_STATUS 0xAA +#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020 +#define IXGBE_PCI_LINK_STATUS 0xB2 +#define IXGBE_PCI_DEVICE_CONTROL2 0xC8 +#define IXGBE_PCI_LINK_WIDTH 0x3F0 +#define IXGBE_PCI_LINK_WIDTH_1 0x10 +#define IXGBE_PCI_LINK_WIDTH_2 0x20 +#define IXGBE_PCI_LINK_WIDTH_4 0x40 +#define IXGBE_PCI_LINK_WIDTH_8 0x80 +#define IXGBE_PCI_LINK_SPEED 0xF +#define IXGBE_PCI_LINK_SPEED_2500 0x1 +#define IXGBE_PCI_LINK_SPEED_5000 0x2 +#define IXGBE_PCI_LINK_SPEED_8000 0x3 +#define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E +#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 +#define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 + +/* Number of 100 microseconds we wait for PCI Express master disable */ +#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 + +/* Check whether address is multicast. This is little-endian specific check.*/ +#define IXGBE_IS_MULTICAST(Address) \ + (bool)(((u8 *)(Address))[0] & ((u8)0x01)) + +/* Check whether an address is broadcast. */ +#define IXGBE_IS_BROADCAST(Address) \ + ((((u8 *)(Address))[0] == ((u8)0xff)) && \ + (((u8 *)(Address))[1] == ((u8)0xff))) + +/* RAH */ +#define IXGBE_RAH_VIND_MASK 0x003C0000 +#define IXGBE_RAH_VIND_SHIFT 18 +#define IXGBE_RAH_AV 0x80000000 +#define IXGBE_CLEAR_VMDQ_ALL 0xFFFFFFFF + +/* Header split receive */ +#define IXGBE_RFCTL_ISCSI_DIS 0x00000001 +#define IXGBE_RFCTL_ISCSI_DWC_MASK 0x0000003E +#define IXGBE_RFCTL_ISCSI_DWC_SHIFT 1 +#define IXGBE_RFCTL_RSC_DIS 0x00000010 +#define IXGBE_RFCTL_NFSW_DIS 0x00000040 +#define IXGBE_RFCTL_NFSR_DIS 0x00000080 +#define IXGBE_RFCTL_NFS_VER_MASK 0x00000300 +#define IXGBE_RFCTL_NFS_VER_SHIFT 8 +#define IXGBE_RFCTL_NFS_VER_2 0 +#define IXGBE_RFCTL_NFS_VER_3 1 +#define IXGBE_RFCTL_NFS_VER_4 2 +#define IXGBE_RFCTL_IPV6_DIS 0x00000400 +#define IXGBE_RFCTL_IPV6_XSUM_DIS 0x00000800 +#define IXGBE_RFCTL_IPFRSP_DIS 0x00004000 +#define IXGBE_RFCTL_IPV6_EX_DIS 0x00010000 +#define IXGBE_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 + +/* Transmit Config masks */ +#define IXGBE_TXDCTL_ENABLE 0x02000000 /* Ena specific Tx Queue */ +#define IXGBE_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wr-bk flushing */ +#define IXGBE_TXDCTL_WTHRESH_SHIFT 16 /* shift to WTHRESH bits */ +/* Enable short packet padding to 64 bytes */ +#define IXGBE_TX_PAD_ENABLE 0x00000400 +#define IXGBE_JUMBO_FRAME_ENABLE 0x00000004 /* Allow jumbo frames */ +/* This allows for 16K packets + 4k for vlan */ +#define IXGBE_MAX_FRAME_SZ 0x40040000 + +#define IXGBE_TDWBAL_HEAD_WB_ENABLE 0x1 /* Tx head write-back enable */ +#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE 0x2 /* Tx seq# write-back enable */ + +/* Receive Config masks */ +#define IXGBE_RXCTRL_RXEN 0x00000001 /* Enable Receiver */ +#define IXGBE_RXCTRL_DMBYPS 0x00000002 /* Desc Monitor Bypass */ +#define IXGBE_RXDCTL_ENABLE 0x02000000 /* Ena specific Rx Queue */ +#define IXGBE_RXDCTL_SWFLSH 0x04000000 /* Rx Desc wr-bk flushing */ +#define IXGBE_RXDCTL_RLPMLMASK 0x00003FFF /* X540 supported only */ +#define IXGBE_RXDCTL_RLPML_EN 0x00008000 +#define IXGBE_RXDCTL_VME 0x40000000 /* VLAN mode enable */ + +#define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ +#define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ + +#define IXGBE_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ +#define IXGBE_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ +#define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00 +#define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02 +#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A +#define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */ + +#define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF +#define IXGBE_RXMTRL_V1_SYNC_MSG 0x00 +#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG 0x01 +#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG 0x02 +#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG 0x03 +#define IXGBE_RXMTRL_V1_MGMT_MSG 0x04 + +#define IXGBE_RXMTRL_V2_MSGID_MASK 0x0000FF00 +#define IXGBE_RXMTRL_V2_SYNC_MSG 0x0000 +#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG 0x0100 +#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG 0x0200 +#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG 0x0300 +#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG 0x0800 +#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG 0x0900 +#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG 0x0A00 +#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG 0x0B00 +#define IXGBE_RXMTRL_V2_SIGNALLING_MSG 0x0C00 +#define IXGBE_RXMTRL_V2_MGMT_MSG 0x0D00 + +#define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */ +#define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/ +#define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */ +#define IXGBE_FCTRL_BAM 0x00000400 /* Broadcast Accept Mode */ +#define IXGBE_FCTRL_PMCF 0x00001000 /* Pass MAC Control Frames */ +#define IXGBE_FCTRL_DPF 0x00002000 /* Discard Pause Frame */ +/* Receive Priority Flow Control Enable */ +#define IXGBE_FCTRL_RPFCE 0x00004000 +#define IXGBE_FCTRL_RFCE 0x00008000 /* Receive Flow Control Ena */ +#define IXGBE_MFLCN_PMCF 0x00000001 /* Pass MAC Control Frames */ +#define IXGBE_MFLCN_DPF 0x00000002 /* Discard Pause Frame */ +#define IXGBE_MFLCN_RPFCE 0x00000004 /* Receive Priority FC Enable */ +#define IXGBE_MFLCN_RFCE 0x00000008 /* Receive FC Enable */ +#define IXGBE_MFLCN_RPFCE_MASK 0x00000FF4 /* Rx Priority FC bitmap mask */ +#define IXGBE_MFLCN_RPFCE_SHIFT 4 /* Rx Priority FC bitmap shift */ + +/* Multiple Receive Queue Control */ +#define IXGBE_MRQC_RSSEN 0x00000001 /* RSS Enable */ +#define IXGBE_MRQC_MRQE_MASK 0xF /* Bits 3:0 */ +#define IXGBE_MRQC_RT8TCEN 0x00000002 /* 8 TC no RSS */ +#define IXGBE_MRQC_RT4TCEN 0x00000003 /* 4 TC no RSS */ +#define IXGBE_MRQC_RTRSS8TCEN 0x00000004 /* 8 TC w/ RSS */ +#define IXGBE_MRQC_RTRSS4TCEN 0x00000005 /* 4 TC w/ RSS */ +#define IXGBE_MRQC_VMDQEN 0x00000008 /* VMDq2 64 pools no RSS */ +#define IXGBE_MRQC_VMDQRSS32EN 0x0000000A /* VMDq2 32 pools w/ RSS */ +#define IXGBE_MRQC_VMDQRSS64EN 0x0000000B /* VMDq2 64 pools w/ RSS */ +#define IXGBE_MRQC_VMDQRT8TCEN 0x0000000C /* VMDq2/RT 16 pool 8 TC */ +#define IXGBE_MRQC_VMDQRT4TCEN 0x0000000D /* VMDq2/RT 32 pool 4 TC */ +#define IXGBE_MRQC_RSS_FIELD_MASK 0xFFFF0000 +#define IXGBE_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 +#define IXGBE_MRQC_RSS_FIELD_IPV4 0x00020000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP 0x00040000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_EX 0x00080000 +#define IXGBE_MRQC_RSS_FIELD_IPV6 0x00100000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 +#define IXGBE_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000 +#define IXGBE_MRQC_L3L4TXSWEN 0x00008000 + +/* Queue Drop Enable */ +#define IXGBE_QDE_ENABLE 0x00000001 +#define IXGBE_QDE_IDX_MASK 0x00007F00 +#define IXGBE_QDE_IDX_SHIFT 8 +#define IXGBE_QDE_WRITE 0x00010000 +#define IXGBE_QDE_READ 0x00020000 + +#define IXGBE_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ +#define IXGBE_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define IXGBE_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define IXGBE_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define IXGBE_TXD_CMD_IC 0x04000000 /* Insert Checksum */ +#define IXGBE_TXD_CMD_RS 0x08000000 /* Report Status */ +#define IXGBE_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ +#define IXGBE_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ +#define IXGBE_TXD_STAT_DD 0x00000001 /* Descriptor Done */ + +#define IXGBE_RXDADV_IPSEC_STATUS_SECP 0x00020000 +#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 +#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 +#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED 0x18000000 +#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 +/* Multiple Transmit Queue Command Register */ +#define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */ +#define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */ +#define IXGBE_MTQC_64Q_1PB 0x0 /* 64 queues 1 pack buffer */ +#define IXGBE_MTQC_32VF 0x8 /* 4 TX Queues per pool w/32VF's */ +#define IXGBE_MTQC_64VF 0x4 /* 2 TX Queues per pool w/64VF's */ +#define IXGBE_MTQC_4TC_4TQ 0x8 /* 4 TC if RT_ENA and VT_ENA */ +#define IXGBE_MTQC_8TC_8TQ 0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */ + +/* Receive Descriptor bit definitions */ +#define IXGBE_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define IXGBE_RXD_STAT_EOP 0x02 /* End of Packet */ +#define IXGBE_RXD_STAT_FLM 0x04 /* FDir Match */ +#define IXGBE_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ +#define IXGBE_RXDADV_NEXTP_MASK 0x000FFFF0 /* Next Descriptor Index */ +#define IXGBE_RXDADV_NEXTP_SHIFT 0x00000004 +#define IXGBE_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ +#define IXGBE_RXD_STAT_L4CS 0x20 /* L4 xsum calculated */ +#define IXGBE_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ +#define IXGBE_RXD_STAT_PIF 0x80 /* passed in-exact filter */ +#define IXGBE_RXD_STAT_CRCV 0x100 /* Speculative CRC Valid */ +#define IXGBE_RXD_STAT_VEXT 0x200 /* 1st VLAN found */ +#define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ +#define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ +#define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */ +#define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */ +#define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */ +#define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */ +#define IXGBE_RXD_STAT_ACK 0x8000 /* ACK Packet indication */ +#define IXGBE_RXD_ERR_CE 0x01 /* CRC Error */ +#define IXGBE_RXD_ERR_LE 0x02 /* Length Error */ +#define IXGBE_RXD_ERR_PE 0x08 /* Packet Error */ +#define IXGBE_RXD_ERR_OSE 0x10 /* Oversize Error */ +#define IXGBE_RXD_ERR_USE 0x20 /* Undersize Error */ +#define IXGBE_RXD_ERR_TCPE 0x40 /* TCP/UDP Checksum Error */ +#define IXGBE_RXD_ERR_IPE 0x80 /* IP Checksum Error */ +#define IXGBE_RXDADV_ERR_MASK 0xfff00000 /* RDESC.ERRORS mask */ +#define IXGBE_RXDADV_ERR_SHIFT 20 /* RDESC.ERRORS shift */ +#define IXGBE_RXDADV_ERR_RXE 0x20000000 /* Any MAC Error */ +#define IXGBE_RXDADV_ERR_FCEOFE 0x80000000 /* FCoEFe/IPE */ +#define IXGBE_RXDADV_ERR_FCERR 0x00700000 /* FCERR/FDIRERR */ +#define IXGBE_RXDADV_ERR_FDIR_LEN 0x00100000 /* FDIR Length error */ +#define IXGBE_RXDADV_ERR_FDIR_DROP 0x00200000 /* FDIR Drop error */ +#define IXGBE_RXDADV_ERR_FDIR_COLL 0x00400000 /* FDIR Collision error */ +#define IXGBE_RXDADV_ERR_HBO 0x00800000 /*Header Buffer Overflow */ +#define IXGBE_RXDADV_ERR_CE 0x01000000 /* CRC Error */ +#define IXGBE_RXDADV_ERR_LE 0x02000000 /* Length Error */ +#define IXGBE_RXDADV_ERR_PE 0x08000000 /* Packet Error */ +#define IXGBE_RXDADV_ERR_OSE 0x10000000 /* Oversize Error */ +#define IXGBE_RXDADV_ERR_USE 0x20000000 /* Undersize Error */ +#define IXGBE_RXDADV_ERR_TCPE 0x40000000 /* TCP/UDP Checksum Error */ +#define IXGBE_RXDADV_ERR_IPE 0x80000000 /* IP Checksum Error */ +#define IXGBE_RXD_VLAN_ID_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ +#define IXGBE_RXD_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ +#define IXGBE_RXD_PRI_SHIFT 13 +#define IXGBE_RXD_CFI_MASK 0x1000 /* CFI is bit 12 */ +#define IXGBE_RXD_CFI_SHIFT 12 + +#define IXGBE_RXDADV_STAT_DD IXGBE_RXD_STAT_DD /* Done */ +#define IXGBE_RXDADV_STAT_EOP IXGBE_RXD_STAT_EOP /* End of Packet */ +#define IXGBE_RXDADV_STAT_FLM IXGBE_RXD_STAT_FLM /* FDir Match */ +#define IXGBE_RXDADV_STAT_VP IXGBE_RXD_STAT_VP /* IEEE VLAN Pkt */ +#define IXGBE_RXDADV_STAT_MASK 0x000fffff /* Stat/NEXTP: bit 0-19 */ +#define IXGBE_RXDADV_STAT_FCEOFS 0x00000040 /* FCoE EOF/SOF Stat */ +#define IXGBE_RXDADV_STAT_FCSTAT 0x00000030 /* FCoE Pkt Stat */ +#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH 0x00000000 /* 00: No Ctxt Match */ +#define IXGBE_RXDADV_STAT_FCSTAT_NODDP 0x00000010 /* 01: Ctxt w/o DDP */ +#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */ +#define IXGBE_RXDADV_STAT_FCSTAT_DDP 0x00000030 /* 11: Ctxt w/ DDP */ +#define IXGBE_RXDADV_STAT_TS 0x00010000 /* IEEE1588 Time Stamp */ + +/* PSRTYPE bit definitions */ +#define IXGBE_PSRTYPE_TCPHDR 0x00000010 +#define IXGBE_PSRTYPE_UDPHDR 0x00000020 +#define IXGBE_PSRTYPE_IPV4HDR 0x00000100 +#define IXGBE_PSRTYPE_IPV6HDR 0x00000200 +#define IXGBE_PSRTYPE_L2HDR 0x00001000 + +/* SRRCTL bit definitions */ +#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ +#define IXGBE_SRRCTL_RDMTS_SHIFT 22 +#define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 +#define IXGBE_SRRCTL_DROP_EN 0x10000000 +#define IXGBE_SRRCTL_BSIZEPKT_MASK 0x0000007F +#define IXGBE_SRRCTL_BSIZEHDR_MASK 0x00003F00 +#define IXGBE_SRRCTL_DESCTYPE_LEGACY 0x00000000 +#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 +#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 +#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 +#define IXGBE_SRRCTL_DESCTYPE_MASK 0x0E000000 + +#define IXGBE_RXDPS_HDRSTAT_HDRSP 0x00008000 +#define IXGBE_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF + +#define IXGBE_RXDADV_RSSTYPE_MASK 0x0000000F +#define IXGBE_RXDADV_PKTTYPE_MASK 0x0000FFF0 +#define IXGBE_RXDADV_PKTTYPE_MASK_EX 0x0001FFF0 +#define IXGBE_RXDADV_HDRBUFLEN_MASK 0x00007FE0 +#define IXGBE_RXDADV_RSCCNT_MASK 0x001E0000 +#define IXGBE_RXDADV_RSCCNT_SHIFT 17 +#define IXGBE_RXDADV_HDRBUFLEN_SHIFT 5 +#define IXGBE_RXDADV_SPLITHEADER_EN 0x00001000 +#define IXGBE_RXDADV_SPH 0x8000 + +/* RSS Hash results */ +#define IXGBE_RXDADV_RSSTYPE_NONE 0x00000000 +#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP 0x00000001 +#define IXGBE_RXDADV_RSSTYPE_IPV4 0x00000002 +#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP 0x00000003 +#define IXGBE_RXDADV_RSSTYPE_IPV6_EX 0x00000004 +#define IXGBE_RXDADV_RSSTYPE_IPV6 0x00000005 +#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006 +#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP 0x00000007 +#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP 0x00000008 +#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009 + +/* RSS Packet Types as indicated in the receive descriptor. */ +#define IXGBE_RXDADV_PKTTYPE_NONE 0x00000000 +#define IXGBE_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPv4 hdr present */ +#define IXGBE_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPv4 hdr + extensions */ +#define IXGBE_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPv6 hdr present */ +#define IXGBE_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPv6 hdr + extensions */ +#define IXGBE_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */ +#define IXGBE_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ +#define IXGBE_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ +#define IXGBE_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ +#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ +#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */ +#define IXGBE_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */ +#define IXGBE_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */ +#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ +#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ + +/* Security Processing bit Indication */ +#define IXGBE_RXDADV_LNKSEC_STATUS_SECP 0x00020000 +#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 +#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 +#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 +#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 + +/* Masks to determine if packets should be dropped due to frame errors */ +#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \ + IXGBE_RXD_ERR_CE | \ + IXGBE_RXD_ERR_LE | \ + IXGBE_RXD_ERR_PE | \ + IXGBE_RXD_ERR_OSE | \ + IXGBE_RXD_ERR_USE) + +#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \ + IXGBE_RXDADV_ERR_CE | \ + IXGBE_RXDADV_ERR_LE | \ + IXGBE_RXDADV_ERR_PE | \ + IXGBE_RXDADV_ERR_OSE | \ + IXGBE_RXDADV_ERR_USE) + +#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK_82599 IXGBE_RXDADV_ERR_RXE + +/* Multicast bit mask */ +#define IXGBE_MCSTCTRL_MFE 0x4 + +/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ +#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE 8 +#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE 8 +#define IXGBE_REQ_TX_BUFFER_GRANULARITY 1024 + +/* Vlan-specific macros */ +#define IXGBE_RX_DESC_SPECIAL_VLAN_MASK 0x0FFF /* VLAN ID in lower 12 bits */ +#define IXGBE_RX_DESC_SPECIAL_PRI_MASK 0xE000 /* Priority in upper 3 bits */ +#define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT 0x000D /* Priority in upper 3 of 16 */ +#define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT IXGBE_RX_DESC_SPECIAL_PRI_SHIFT + +/* SR-IOV specific macros */ +#define IXGBE_MBVFICR_INDEX(vf_number) (vf_number >> 4) +#define IXGBE_MBVFICR(_i) (0x00710 + ((_i) * 4)) +#define IXGBE_VFLRE(_i) (((_i & 1) ? 0x001C0 : 0x00600)) +#define IXGBE_VFLREC(_i) (0x00700 + ((_i) * 4)) +/* Translated register #defines */ +#define IXGBE_PVFCTRL(P) (0x00300 + (4 * (P))) +#define IXGBE_PVFSTATUS(P) (0x00008 + (0 * (P))) +#define IXGBE_PVFLINKS(P) (0x042A4 + (0 * (P))) +#define IXGBE_PVFRTIMER(P) (0x00048 + (0 * (P))) +#define IXGBE_PVFMAILBOX(P) (0x04C00 + (4 * (P))) +#define IXGBE_PVFRXMEMWRAP(P) (0x03190 + (0 * (P))) +#define IXGBE_PVTEICR(P) (0x00B00 + (4 * (P))) +#define IXGBE_PVTEICS(P) (0x00C00 + (4 * (P))) +#define IXGBE_PVTEIMS(P) (0x00D00 + (4 * (P))) +#define IXGBE_PVTEIMC(P) (0x00E00 + (4 * (P))) +#define IXGBE_PVTEIAC(P) (0x00F00 + (4 * (P))) +#define IXGBE_PVTEIAM(P) (0x04D00 + (4 * (P))) +#define IXGBE_PVTEITR(P) (((P) < 24) ? (0x00820 + ((P) * 4)) : \ + (0x012300 + (((P) - 24) * 4))) +#define IXGBE_PVTIVAR(P) (0x12500 + (4 * (P))) +#define IXGBE_PVTIVAR_MISC(P) (0x04E00 + (4 * (P))) +#define IXGBE_PVTRSCINT(P) (0x12000 + (4 * (P))) +#define IXGBE_VFPBACL(P) (0x110C8 + (4 * (P))) +#define IXGBE_PVFRDBAL(P) ((P < 64) ? (0x01000 + (0x40 * (P))) \ + : (0x0D000 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDBAH(P) ((P < 64) ? (0x01004 + (0x40 * (P))) \ + : (0x0D004 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDLEN(P) ((P < 64) ? (0x01008 + (0x40 * (P))) \ + : (0x0D008 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDH(P) ((P < 64) ? (0x01010 + (0x40 * (P))) \ + : (0x0D010 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDT(P) ((P < 64) ? (0x01018 + (0x40 * (P))) \ + : (0x0D018 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRXDCTL(P) ((P < 64) ? (0x01028 + (0x40 * (P))) \ + : (0x0D028 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFSRRCTL(P) ((P < 64) ? (0x01014 + (0x40 * (P))) \ + : (0x0D014 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFPSRTYPE(P) (0x0EA00 + (4 * (P))) +#define IXGBE_PVFTDBAL(P) (0x06000 + (0x40 * (P))) +#define IXGBE_PVFTDBAH(P) (0x06004 + (0x40 * (P))) +#define IXGBE_PVFTTDLEN(P) (0x06008 + (0x40 * (P))) +#define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P))) +#define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P))) +#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P))) +#define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P))) +#define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P))) +#define IXGBE_PVFDCA_RXCTRL(P) (((P) < 64) ? (0x0100C + (0x40 * (P))) \ + : (0x0D00C + (0x40 * ((P) - 64)))) +#define IXGBE_PVFDCA_TXCTRL(P) (0x0600C + (0x40 * (P))) +#define IXGBE_PVFGPRC(x) (0x0101C + (0x40 * (x))) +#define IXGBE_PVFGPTC(x) (0x08300 + (0x04 * (x))) +#define IXGBE_PVFGORC_LSB(x) (0x01020 + (0x40 * (x))) +#define IXGBE_PVFGORC_MSB(x) (0x0D020 + (0x40 * (x))) +#define IXGBE_PVFGOTC_LSB(x) (0x08400 + (0x08 * (x))) +#define IXGBE_PVFGOTC_MSB(x) (0x08404 + (0x08 * (x))) +#define IXGBE_PVFMPRC(x) (0x0D01C + (0x40 * (x))) + +#define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \ + (IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index))) +#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \ + (IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index))) + +/* Little Endian defines */ +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 + +#endif +#ifndef __be16 +/* Big Endian defines */ +#define __be16 u16 +#define __be32 u32 +#define __be64 u64 + +#endif +enum ixgbe_fdir_pballoc_type { + IXGBE_FDIR_PBALLOC_NONE = 0, + IXGBE_FDIR_PBALLOC_64K = 1, + IXGBE_FDIR_PBALLOC_128K = 2, + IXGBE_FDIR_PBALLOC_256K = 3, +}; + +/* Flow Director register values */ +#define IXGBE_FDIRCTRL_PBALLOC_64K 0x00000001 +#define IXGBE_FDIRCTRL_PBALLOC_128K 0x00000002 +#define IXGBE_FDIRCTRL_PBALLOC_256K 0x00000003 +#define IXGBE_FDIRCTRL_INIT_DONE 0x00000008 +#define IXGBE_FDIRCTRL_PERFECT_MATCH 0x00000010 +#define IXGBE_FDIRCTRL_REPORT_STATUS 0x00000020 +#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS 0x00000080 +#define IXGBE_FDIRCTRL_DROP_Q_SHIFT 8 +#define IXGBE_FDIRCTRL_FLEX_SHIFT 16 +#define IXGBE_FDIRCTRL_SEARCHLIM 0x00800000 +#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT 24 +#define IXGBE_FDIRCTRL_FULL_THRESH_MASK 0xF0000000 +#define IXGBE_FDIRCTRL_FULL_THRESH_SHIFT 28 + +#define IXGBE_FDIRTCPM_DPORTM_SHIFT 16 +#define IXGBE_FDIRUDPM_DPORTM_SHIFT 16 +#define IXGBE_FDIRIP6M_DIPM_SHIFT 16 +#define IXGBE_FDIRM_VLANID 0x00000001 +#define IXGBE_FDIRM_VLANP 0x00000002 +#define IXGBE_FDIRM_POOL 0x00000004 +#define IXGBE_FDIRM_L4P 0x00000008 +#define IXGBE_FDIRM_FLEX 0x00000010 +#define IXGBE_FDIRM_DIPv6 0x00000020 + +#define IXGBE_FDIRFREE_FREE_MASK 0xFFFF +#define IXGBE_FDIRFREE_FREE_SHIFT 0 +#define IXGBE_FDIRFREE_COLL_MASK 0x7FFF0000 +#define IXGBE_FDIRFREE_COLL_SHIFT 16 +#define IXGBE_FDIRLEN_MAXLEN_MASK 0x3F +#define IXGBE_FDIRLEN_MAXLEN_SHIFT 0 +#define IXGBE_FDIRLEN_MAXHASH_MASK 0x7FFF0000 +#define IXGBE_FDIRLEN_MAXHASH_SHIFT 16 +#define IXGBE_FDIRUSTAT_ADD_MASK 0xFFFF +#define IXGBE_FDIRUSTAT_ADD_SHIFT 0 +#define IXGBE_FDIRUSTAT_REMOVE_MASK 0xFFFF0000 +#define IXGBE_FDIRUSTAT_REMOVE_SHIFT 16 +#define IXGBE_FDIRFSTAT_FADD_MASK 0x00FF +#define IXGBE_FDIRFSTAT_FADD_SHIFT 0 +#define IXGBE_FDIRFSTAT_FREMOVE_MASK 0xFF00 +#define IXGBE_FDIRFSTAT_FREMOVE_SHIFT 8 +#define IXGBE_FDIRPORT_DESTINATION_SHIFT 16 +#define IXGBE_FDIRVLAN_FLEX_SHIFT 16 +#define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT 15 +#define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT 16 + +#define IXGBE_FDIRCMD_CMD_MASK 0x00000003 +#define IXGBE_FDIRCMD_CMD_ADD_FLOW 0x00000001 +#define IXGBE_FDIRCMD_CMD_REMOVE_FLOW 0x00000002 +#define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT 0x00000003 +#define IXGBE_FDIRCMD_FILTER_VALID 0x00000004 +#define IXGBE_FDIRCMD_FILTER_UPDATE 0x00000008 +#define IXGBE_FDIRCMD_IPv6DMATCH 0x00000010 +#define IXGBE_FDIRCMD_L4TYPE_UDP 0x00000020 +#define IXGBE_FDIRCMD_L4TYPE_TCP 0x00000040 +#define IXGBE_FDIRCMD_L4TYPE_SCTP 0x00000060 +#define IXGBE_FDIRCMD_IPV6 0x00000080 +#define IXGBE_FDIRCMD_CLEARHT 0x00000100 +#define IXGBE_FDIRCMD_DROP 0x00000200 +#define IXGBE_FDIRCMD_INT 0x00000400 +#define IXGBE_FDIRCMD_LAST 0x00000800 +#define IXGBE_FDIRCMD_COLLISION 0x00001000 +#define IXGBE_FDIRCMD_QUEUE_EN 0x00008000 +#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT 5 +#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT 16 +#define IXGBE_FDIRCMD_VT_POOL_SHIFT 24 +#define IXGBE_FDIR_INIT_DONE_POLL 10 +#define IXGBE_FDIRCMD_CMD_POLL 10 + +#define IXGBE_FDIR_DROP_QUEUE 127 + +#define IXGBE_STATUS_OVERHEATING_BIT 20 /* STATUS overtemp bit num */ + +/* Manageablility Host Interface defines */ +#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */ +#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */ +#define IXGBE_HI_COMMAND_TIMEOUT 500 /* Process HI command limit */ + +/* CEM Support */ +#define FW_CEM_HDR_LEN 0x4 +#define FW_CEM_CMD_DRIVER_INFO 0xDD +#define FW_CEM_CMD_DRIVER_INFO_LEN 0x5 +#define FW_CEM_CMD_RESERVED 0X0 +#define FW_CEM_UNUSED_VER 0x0 +#define FW_CEM_MAX_RETRIES 3 +#define FW_CEM_RESP_STATUS_SUCCESS 0x1 + +/* Host Interface Command Structures */ + +struct ixgbe_hic_hdr { + u8 cmd; + u8 buf_len; + union { + u8 cmd_resv; + u8 ret_status; + } cmd_or_resp; + u8 checksum; +}; + +struct ixgbe_hic_drv_info { + struct ixgbe_hic_hdr hdr; + u8 port_num; + u8 ver_sub; + u8 ver_build; + u8 ver_min; + u8 ver_maj; + u8 pad; /* end spacing to ensure length is mult. of dword */ + u16 pad2; /* end spacing to ensure length is mult. of dword2 */ +}; + +/* Transmit Descriptor - Legacy */ +struct ixgbe_legacy_tx_desc { + u64 buffer_addr; /* Address of the descriptor's data buffer */ + union { + __le32 data; + struct { + __le16 length; /* Data buffer length */ + u8 cso; /* Checksum offset */ + u8 cmd; /* Descriptor control */ + } flags; + } lower; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 css; /* Checksum start */ + __le16 vlan; + } fields; + } upper; +}; + +/* Transmit Descriptor - Advanced */ +union ixgbe_adv_tx_desc { + struct { + __le64 buffer_addr; /* Address of descriptor's data buf */ + __le32 cmd_type_len; + __le32 olinfo_status; + } read; + struct { + __le64 rsvd; /* Reserved */ + __le32 nxtseq_seed; + __le32 status; + } wb; +}; + +/* Receive Descriptor - Legacy */ +struct ixgbe_legacy_rx_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + __le16 length; /* Length of data DMAed into data buffer */ + __le16 csum; /* Packet checksum */ + u8 status; /* Descriptor status */ + u8 errors; /* Descriptor Errors */ + __le16 vlan; +}; + +/* Receive Descriptor - Advanced */ +union ixgbe_adv_rx_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + } read; + struct { + struct { + union { + __le32 data; + struct { + __le16 pkt_info; /* RSS, Pkt type */ + __le16 hdr_info; /* Splithdr, hdrlen */ + } hs_rss; + } lo_dword; + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length; /* Packet length */ + __le16 vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +/* Context descriptors */ +struct ixgbe_adv_tx_context_desc { + __le32 vlan_macip_lens; + __le32 seqnum_seed; + __le32 type_tucmd_mlhl; + __le32 mss_l4len_idx; +}; + +/* Adv Transmit Descriptor Config Masks */ +#define IXGBE_ADVTXD_DTALEN_MASK 0x0000FFFF /* Data buf length(bytes) */ +#define IXGBE_ADVTXD_MAC_LINKSEC 0x00040000 /* Insert LinkSec */ +#define IXGBE_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 time stamp */ +#define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x000003FF /* IPSec SA index */ +#define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK 0x000001FF /* IPSec ESP length */ +#define IXGBE_ADVTXD_DTYP_MASK 0x00F00000 /* DTYP mask */ +#define IXGBE_ADVTXD_DTYP_CTXT 0x00200000 /* Adv Context Desc */ +#define IXGBE_ADVTXD_DTYP_DATA 0x00300000 /* Adv Data Descriptor */ +#define IXGBE_ADVTXD_DCMD_EOP IXGBE_TXD_CMD_EOP /* End of Packet */ +#define IXGBE_ADVTXD_DCMD_IFCS IXGBE_TXD_CMD_IFCS /* Insert FCS */ +#define IXGBE_ADVTXD_DCMD_RS IXGBE_TXD_CMD_RS /* Report Status */ +#define IXGBE_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */ +#define IXGBE_ADVTXD_DCMD_DEXT IXGBE_TXD_CMD_DEXT /* Desc ext 1=Adv */ +#define IXGBE_ADVTXD_DCMD_VLE IXGBE_TXD_CMD_VLE /* VLAN pkt enable */ +#define IXGBE_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ +#define IXGBE_ADVTXD_STAT_DD IXGBE_TXD_STAT_DD /* Descriptor Done */ +#define IXGBE_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED pres in WB */ +#define IXGBE_ADVTXD_STAT_RSV 0x0000000C /* STA Reserved */ +#define IXGBE_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */ +#define IXGBE_ADVTXD_CC 0x00000080 /* Check Context */ +#define IXGBE_ADVTXD_POPTS_SHIFT 8 /* Adv desc POPTS shift */ +#define IXGBE_ADVTXD_POPTS_IXSM (IXGBE_TXD_POPTS_IXSM << \ + IXGBE_ADVTXD_POPTS_SHIFT) +#define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \ + IXGBE_ADVTXD_POPTS_SHIFT) +#define IXGBE_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ +#define IXGBE_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ +#define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ +/* 1st&Last TSO-full iSCSI PDU */ +#define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800 +#define IXGBE_ADVTXD_POPTS_RSV 0x00002000 /* POPTS Reserved */ +#define IXGBE_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ +#define IXGBE_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define IXGBE_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ +#define IXGBE_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ +#define IXGBE_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ +#define IXGBE_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ +#define IXGBE_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ +#define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ +#define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /* req Markers and CRC */ +#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ +#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ +#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */ +#define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */ +#define IXGBE_ADVTXD_FCOEF_EOF_MASK (0x3 << 10) /* FC EOF index */ +#define IXGBE_ADVTXD_FCOEF_SOF ((1 << 2) << 10) /* FC SOF index */ +#define IXGBE_ADVTXD_FCOEF_PARINC ((1 << 3) << 10) /* Rel_Off in F_CTL */ +#define IXGBE_ADVTXD_FCOEF_ORIE ((1 << 4) << 10) /* Orientation End */ +#define IXGBE_ADVTXD_FCOEF_ORIS ((1 << 5) << 10) /* Orientation Start */ +#define IXGBE_ADVTXD_FCOEF_EOF_N (0x0 << 10) /* 00: EOFn */ +#define IXGBE_ADVTXD_FCOEF_EOF_T (0x1 << 10) /* 01: EOFt */ +#define IXGBE_ADVTXD_FCOEF_EOF_NI (0x2 << 10) /* 10: EOFni */ +#define IXGBE_ADVTXD_FCOEF_EOF_A (0x3 << 10) /* 11: EOFa */ +#define IXGBE_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define IXGBE_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ + +/* Autonegotiation advertised speeds */ +typedef u32 ixgbe_autoneg_advertised; +/* Link speed */ +typedef u32 ixgbe_link_speed; +#define IXGBE_LINK_SPEED_UNKNOWN 0 +#define IXGBE_LINK_SPEED_100_FULL 0x0008 +#define IXGBE_LINK_SPEED_1GB_FULL 0x0020 +#define IXGBE_LINK_SPEED_10GB_FULL 0x0080 +#define IXGBE_LINK_SPEED_82598_AUTONEG (IXGBE_LINK_SPEED_1GB_FULL | \ + IXGBE_LINK_SPEED_10GB_FULL) +#define IXGBE_LINK_SPEED_82599_AUTONEG (IXGBE_LINK_SPEED_100_FULL | \ + IXGBE_LINK_SPEED_1GB_FULL | \ + IXGBE_LINK_SPEED_10GB_FULL) + + +/* Physical layer type */ +typedef u32 ixgbe_physical_layer; +#define IXGBE_PHYSICAL_LAYER_UNKNOWN 0 +#define IXGBE_PHYSICAL_LAYER_10GBASE_T 0x0001 +#define IXGBE_PHYSICAL_LAYER_1000BASE_T 0x0002 +#define IXGBE_PHYSICAL_LAYER_100BASE_TX 0x0004 +#define IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU 0x0008 +#define IXGBE_PHYSICAL_LAYER_10GBASE_LR 0x0010 +#define IXGBE_PHYSICAL_LAYER_10GBASE_LRM 0x0020 +#define IXGBE_PHYSICAL_LAYER_10GBASE_SR 0x0040 +#define IXGBE_PHYSICAL_LAYER_10GBASE_KX4 0x0080 +#define IXGBE_PHYSICAL_LAYER_10GBASE_CX4 0x0100 +#define IXGBE_PHYSICAL_LAYER_1000BASE_KX 0x0200 +#define IXGBE_PHYSICAL_LAYER_1000BASE_BX 0x0400 +#define IXGBE_PHYSICAL_LAYER_10GBASE_KR 0x0800 +#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 +#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 +#define IXGBE_PHYSICAL_LAYER_1000BASE_SX 0x4000 + +/* Flow Control Data Sheet defined values + * Calculation and defines taken from 802.1bb Annex O + */ + +/* BitTimes (BT) conversion */ +#define IXGBE_BT2KB(BT) ((BT + (8 * 1024 - 1)) / (8 * 1024)) +#define IXGBE_B2BT(BT) (BT * 8) + +/* Calculate Delay to respond to PFC */ +#define IXGBE_PFC_D 672 + +/* Calculate Cable Delay */ +#define IXGBE_CABLE_DC 5556 /* Delay Copper */ +#define IXGBE_CABLE_DO 5000 /* Delay Optical */ + +/* Calculate Interface Delay X540 */ +#define IXGBE_PHY_DC 25600 /* Delay 10G BASET */ +#define IXGBE_MAC_DC 8192 /* Delay Copper XAUI interface */ +#define IXGBE_XAUI_DC (2 * 2048) /* Delay Copper Phy */ + +#define IXGBE_ID_X540 (IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC) + +/* Calculate Interface Delay 82598, 82599 */ +#define IXGBE_PHY_D 12800 +#define IXGBE_MAC_D 4096 +#define IXGBE_XAUI_D (2 * 1024) + +#define IXGBE_ID (IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D) + +/* Calculate Delay incurred from higher layer */ +#define IXGBE_HD 6144 + +/* Calculate PCI Bus delay for low thresholds */ +#define IXGBE_PCI_DELAY 10000 + +/* Calculate X540 delay value in bit times */ +#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \ + ((36 * \ + (IXGBE_B2BT(_max_frame_link) + \ + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID_X540) + \ + IXGBE_HD) / 25 + 1) + \ + 2 * IXGBE_B2BT(_max_frame_tc)) + +/* Calculate 82599, 82598 delay value in bit times */ +#define IXGBE_DV(_max_frame_link, _max_frame_tc) \ + ((36 * \ + (IXGBE_B2BT(_max_frame_link) + \ + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID) + \ + IXGBE_HD) / 25 + 1) + \ + 2 * IXGBE_B2BT(_max_frame_tc)) + +/* Calculate low threshold delay values */ +#define IXGBE_LOW_DV_X540(_max_frame_tc) \ + (2 * IXGBE_B2BT(_max_frame_tc) + \ + (36 * IXGBE_PCI_DELAY / 25) + 1) +#define IXGBE_LOW_DV(_max_frame_tc) \ + (2 * IXGBE_LOW_DV_X540(_max_frame_tc)) + +/* Software ATR hash keys */ +#define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2 +#define IXGBE_ATR_SIGNATURE_HASH_KEY 0x174D3614 + +/* Software ATR input stream values and masks */ +#define IXGBE_ATR_HASH_MASK 0x7fff +#define IXGBE_ATR_L4TYPE_MASK 0x3 +#define IXGBE_ATR_L4TYPE_UDP 0x1 +#define IXGBE_ATR_L4TYPE_TCP 0x2 +#define IXGBE_ATR_L4TYPE_SCTP 0x3 +#define IXGBE_ATR_L4TYPE_IPV6_MASK 0x4 +enum ixgbe_atr_flow_type { + IXGBE_ATR_FLOW_TYPE_IPV4 = 0x0, + IXGBE_ATR_FLOW_TYPE_UDPV4 = 0x1, + IXGBE_ATR_FLOW_TYPE_TCPV4 = 0x2, + IXGBE_ATR_FLOW_TYPE_SCTPV4 = 0x3, + IXGBE_ATR_FLOW_TYPE_IPV6 = 0x4, + IXGBE_ATR_FLOW_TYPE_UDPV6 = 0x5, + IXGBE_ATR_FLOW_TYPE_TCPV6 = 0x6, + IXGBE_ATR_FLOW_TYPE_SCTPV6 = 0x7, +}; + +/* Flow Director ATR input struct. */ +union ixgbe_atr_input { + /* + * Byte layout in order, all values with MSB first: + * + * vm_pool - 1 byte + * flow_type - 1 byte + * vlan_id - 2 bytes + * src_ip - 16 bytes + * dst_ip - 16 bytes + * src_port - 2 bytes + * dst_port - 2 bytes + * flex_bytes - 2 bytes + * bkt_hash - 2 bytes + */ + struct { + u8 vm_pool; + u8 flow_type; + __be16 vlan_id; + __be32 dst_ip[4]; + __be32 src_ip[4]; + __be16 src_port; + __be16 dst_port; + __be16 flex_bytes; + __be16 bkt_hash; + } formatted; + __be32 dword_stream[11]; +}; + +/* Flow Director compressed ATR hash input struct */ +union ixgbe_atr_hash_dword { + struct { + u8 vm_pool; + u8 flow_type; + __be16 vlan_id; + } formatted; + __be32 ip; + struct { + __be16 src; + __be16 dst; + } port; + __be16 flex_bytes; + __be32 dword; +}; + + +/* + * Unavailable: The FCoE Boot Option ROM is not present in the flash. + * Disabled: Present; boot order is not set for any targets on the port. + * Enabled: Present; boot order is set for at least one target on the port. + */ +enum ixgbe_fcoe_boot_status { + ixgbe_fcoe_bootstatus_disabled = 0, + ixgbe_fcoe_bootstatus_enabled = 1, + ixgbe_fcoe_bootstatus_unavailable = 0xFFFF +}; + +enum ixgbe_eeprom_type { + ixgbe_eeprom_uninitialized = 0, + ixgbe_eeprom_spi, + ixgbe_flash, + ixgbe_eeprom_none /* No NVM support */ +}; + +enum ixgbe_mac_type { + ixgbe_mac_unknown = 0, + ixgbe_mac_82598EB, + ixgbe_mac_82599EB, + ixgbe_mac_X540, + ixgbe_num_macs +}; + +enum ixgbe_phy_type { + ixgbe_phy_unknown = 0, + ixgbe_phy_none, + ixgbe_phy_tn, + ixgbe_phy_aq, + ixgbe_phy_cu_unknown, + ixgbe_phy_qt, + ixgbe_phy_xaui, + ixgbe_phy_nl, + ixgbe_phy_sfp_passive_tyco, + ixgbe_phy_sfp_passive_unknown, + ixgbe_phy_sfp_active_unknown, + ixgbe_phy_sfp_avago, + ixgbe_phy_sfp_ftl, + ixgbe_phy_sfp_ftl_active, + ixgbe_phy_sfp_unknown, + ixgbe_phy_sfp_intel, + ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/ + ixgbe_phy_generic +}; + +/* + * SFP+ module type IDs: + * + * ID Module Type + * ============= + * 0 SFP_DA_CU + * 1 SFP_SR + * 2 SFP_LR + * 3 SFP_DA_CU_CORE0 - 82599-specific + * 4 SFP_DA_CU_CORE1 - 82599-specific + * 5 SFP_SR/LR_CORE0 - 82599-specific + * 6 SFP_SR/LR_CORE1 - 82599-specific + */ +enum ixgbe_sfp_type { + ixgbe_sfp_type_da_cu = 0, + ixgbe_sfp_type_sr = 1, + ixgbe_sfp_type_lr = 2, + ixgbe_sfp_type_da_cu_core0 = 3, + ixgbe_sfp_type_da_cu_core1 = 4, + ixgbe_sfp_type_srlr_core0 = 5, + ixgbe_sfp_type_srlr_core1 = 6, + ixgbe_sfp_type_da_act_lmt_core0 = 7, + ixgbe_sfp_type_da_act_lmt_core1 = 8, + ixgbe_sfp_type_1g_cu_core0 = 9, + ixgbe_sfp_type_1g_cu_core1 = 10, + ixgbe_sfp_type_1g_sx_core0 = 11, + ixgbe_sfp_type_1g_sx_core1 = 12, + ixgbe_sfp_type_not_present = 0xFFFE, + ixgbe_sfp_type_unknown = 0xFFFF +}; + +enum ixgbe_media_type { + ixgbe_media_type_unknown = 0, + ixgbe_media_type_fiber, + ixgbe_media_type_fiber_qsfp, + ixgbe_media_type_fiber_lco, + ixgbe_media_type_copper, + ixgbe_media_type_backplane, + ixgbe_media_type_cx4, + ixgbe_media_type_virtual +}; + +/* Flow Control Settings */ +enum ixgbe_fc_mode { + ixgbe_fc_none = 0, + ixgbe_fc_rx_pause, + ixgbe_fc_tx_pause, + ixgbe_fc_full, + ixgbe_fc_default +}; + +/* Smart Speed Settings */ +#define IXGBE_SMARTSPEED_MAX_RETRIES 3 +enum ixgbe_smart_speed { + ixgbe_smart_speed_auto = 0, + ixgbe_smart_speed_on, + ixgbe_smart_speed_off +}; + +/* PCI bus types */ +enum ixgbe_bus_type { + ixgbe_bus_type_unknown = 0, + ixgbe_bus_type_pci, + ixgbe_bus_type_pcix, + ixgbe_bus_type_pci_express, + ixgbe_bus_type_reserved +}; + +/* PCI bus speeds */ +enum ixgbe_bus_speed { + ixgbe_bus_speed_unknown = 0, + ixgbe_bus_speed_33 = 33, + ixgbe_bus_speed_66 = 66, + ixgbe_bus_speed_100 = 100, + ixgbe_bus_speed_120 = 120, + ixgbe_bus_speed_133 = 133, + ixgbe_bus_speed_2500 = 2500, + ixgbe_bus_speed_5000 = 5000, + ixgbe_bus_speed_8000 = 8000, + ixgbe_bus_speed_reserved +}; + +/* PCI bus widths */ +enum ixgbe_bus_width { + ixgbe_bus_width_unknown = 0, + ixgbe_bus_width_pcie_x1 = 1, + ixgbe_bus_width_pcie_x2 = 2, + ixgbe_bus_width_pcie_x4 = 4, + ixgbe_bus_width_pcie_x8 = 8, + ixgbe_bus_width_32 = 32, + ixgbe_bus_width_64 = 64, + ixgbe_bus_width_reserved +}; + +struct ixgbe_addr_filter_info { + u32 num_mc_addrs; + u32 rar_used_count; + u32 mta_in_use; + u32 overflow_promisc; + bool user_set_promisc; +}; + +/* Bus parameters */ +struct ixgbe_bus_info { + enum ixgbe_bus_speed speed; + enum ixgbe_bus_width width; + enum ixgbe_bus_type type; + + u16 func; + u16 lan_id; +}; + +/* Flow control parameters */ +struct ixgbe_fc_info { + u32 high_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl High-water */ + u32 low_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl Low-water */ + u16 pause_time; /* Flow Control Pause timer */ + bool send_xon; /* Flow control send XON */ + bool strict_ieee; /* Strict IEEE mode */ + bool disable_fc_autoneg; /* Do not autonegotiate FC */ + bool fc_was_autonegged; /* Is current_mode the result of autonegging? */ + enum ixgbe_fc_mode current_mode; /* FC mode in effect */ + enum ixgbe_fc_mode requested_mode; /* FC mode requested by caller */ +}; + +/* Statistics counters collected by the MAC */ +struct ixgbe_hw_stats { + u64 crcerrs; + u64 illerrc; + u64 errbc; + u64 mspdc; + u64 mpctotal; + u64 mpc[8]; + u64 mlfc; + u64 mrfc; + u64 rlec; + u64 lxontxc; + u64 lxonrxc; + u64 lxofftxc; + u64 lxoffrxc; + u64 pxontxc[8]; + u64 pxonrxc[8]; + u64 pxofftxc[8]; + u64 pxoffrxc[8]; + u64 prc64; + u64 prc127; + u64 prc255; + u64 prc511; + u64 prc1023; + u64 prc1522; + u64 gprc; + u64 bprc; + u64 mprc; + u64 gptc; + u64 gorc; + u64 gotc; + u64 rnbc[8]; + u64 ruc; + u64 rfc; + u64 roc; + u64 rjc; + u64 mngprc; + u64 mngpdc; + u64 mngptc; + u64 tor; + u64 tpr; + u64 tpt; + u64 ptc64; + u64 ptc127; + u64 ptc255; + u64 ptc511; + u64 ptc1023; + u64 ptc1522; + u64 mptc; + u64 bptc; + u64 xec; + u64 qprc[16]; + u64 qptc[16]; + u64 qbrc[16]; + u64 qbtc[16]; + u64 qprdc[16]; + u64 pxon2offc[8]; + u64 fdirustat_add; + u64 fdirustat_remove; + u64 fdirfstat_fadd; + u64 fdirfstat_fremove; + u64 fdirmatch; + u64 fdirmiss; + u64 fccrc; + u64 fclast; + u64 fcoerpdc; + u64 fcoeprc; + u64 fcoeptc; + u64 fcoedwrc; + u64 fcoedwtc; + u64 fcoe_noddp; + u64 fcoe_noddp_ext_buff; + u64 ldpcec; + u64 pcrc8ec; + u64 b2ospc; + u64 b2ogprc; + u64 o2bgptc; + u64 o2bspc; +}; + +/* forward declaration */ +struct ixgbe_hw; + +/* iterator type for walking multicast address lists */ +typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr, + u32 *vmdq); + +/* Function pointer table */ +struct ixgbe_eeprom_operations { + s32 (*init_params)(struct ixgbe_hw *); + s32 (*read)(struct ixgbe_hw *, u16, u16 *); + s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *); + s32 (*write)(struct ixgbe_hw *, u16, u16); + s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *); + s32 (*validate_checksum)(struct ixgbe_hw *, u16 *); + s32 (*update_checksum)(struct ixgbe_hw *); + u16 (*calc_checksum)(struct ixgbe_hw *); +}; + +struct ixgbe_mac_operations { + s32 (*init_hw)(struct ixgbe_hw *); + s32 (*reset_hw)(struct ixgbe_hw *); + s32 (*start_hw)(struct ixgbe_hw *); + s32 (*clear_hw_cntrs)(struct ixgbe_hw *); + enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *); + u32 (*get_supported_physical_layer)(struct ixgbe_hw *); + s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *); + s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *); + s32 (*set_san_mac_addr)(struct ixgbe_hw *, u8 *); + s32 (*get_device_caps)(struct ixgbe_hw *, u16 *); + s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *); + s32 (*get_fcoe_boot_status)(struct ixgbe_hw *, u16 *); + s32 (*stop_adapter)(struct ixgbe_hw *); + s32 (*get_bus_info)(struct ixgbe_hw *); + void (*set_lan_id)(struct ixgbe_hw *); + s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*); + s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8); + s32 (*setup_sfp)(struct ixgbe_hw *); + s32 (*enable_rx_dma)(struct ixgbe_hw *, u32); + s32 (*disable_sec_rx_path)(struct ixgbe_hw *); + s32 (*enable_sec_rx_path)(struct ixgbe_hw *); + s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16); + void (*release_swfw_sync)(struct ixgbe_hw *, u16); + + /* Link */ + void (*disable_tx_laser)(struct ixgbe_hw *); + void (*enable_tx_laser)(struct ixgbe_hw *); + void (*flap_tx_laser)(struct ixgbe_hw *); + s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); + s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool); + s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *, + bool *); + + /* Packet Buffer manipulation */ + void (*setup_rxpba)(struct ixgbe_hw *, int, u32, int); + + /* LED */ + s32 (*led_on)(struct ixgbe_hw *, u32); + s32 (*led_off)(struct ixgbe_hw *, u32); + s32 (*blink_led_start)(struct ixgbe_hw *, u32); + s32 (*blink_led_stop)(struct ixgbe_hw *, u32); + + /* RAR, Multicast, VLAN */ + s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32); + s32 (*set_uc_addr)(struct ixgbe_hw *, u32, u8 *); + s32 (*clear_rar)(struct ixgbe_hw *, u32); + s32 (*insert_mac_addr)(struct ixgbe_hw *, u8 *, u32); + s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32); + s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32); + s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32); + s32 (*init_rx_addrs)(struct ixgbe_hw *); + s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32, + ixgbe_mc_addr_itr); + s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32, + ixgbe_mc_addr_itr, bool clear); + s32 (*enable_mc)(struct ixgbe_hw *); + s32 (*disable_mc)(struct ixgbe_hw *); + s32 (*clear_vfta)(struct ixgbe_hw *); + s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool); + s32 (*set_vlvf)(struct ixgbe_hw *, u32, u32, bool, bool *); + s32 (*init_uta_tables)(struct ixgbe_hw *); + void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int); + void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int); + + /* Flow Control */ + s32 (*fc_enable)(struct ixgbe_hw *); + + /* Manageability interface */ + s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); + s32 (*get_thermal_sensor_data)(struct ixgbe_hw *); + s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw); +}; + +struct ixgbe_phy_operations { + s32 (*identify)(struct ixgbe_hw *); + s32 (*identify_sfp)(struct ixgbe_hw *); + s32 (*init)(struct ixgbe_hw *); + s32 (*reset)(struct ixgbe_hw *); + s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *); + s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16); + s32 (*setup_link)(struct ixgbe_hw *); + s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool, + bool); + s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *); + s32 (*get_firmware_version)(struct ixgbe_hw *, u16 *); + s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *); + s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8); + s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); + s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); + void (*i2c_bus_clear)(struct ixgbe_hw *); + s32 (*check_overtemp)(struct ixgbe_hw *); +}; + +struct ixgbe_eeprom_info { + struct ixgbe_eeprom_operations ops; + enum ixgbe_eeprom_type type; + u32 semaphore_delay; + u16 word_size; + u16 address_bits; + u16 word_page_size; +}; + +#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED 0x01 +struct ixgbe_mac_info { + struct ixgbe_mac_operations ops; + enum ixgbe_mac_type type; + u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + u8 perm_addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + u8 san_addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + /* prefix for World Wide Node Name (WWNN) */ + u16 wwnn_prefix; + /* prefix for World Wide Port Name (WWPN) */ + u16 wwpn_prefix; +#define IXGBE_MAX_MTA 128 + u32 mta_shadow[IXGBE_MAX_MTA]; + s32 mc_filter_type; + u32 mcft_size; + u32 vft_size; + u32 num_rar_entries; + u32 rar_highwater; + u32 rx_pb_size; + u32 max_tx_queues; + u32 max_rx_queues; + u32 orig_autoc; + u8 san_mac_rar_index; + u32 orig_autoc2; + u16 max_msix_vectors; + bool arc_subsystem_valid; + bool orig_link_settings_stored; + bool autotry_restart; + u8 flags; + struct ixgbe_thermal_sensor_data thermal_sensor_data; +}; + +struct ixgbe_phy_info { + struct ixgbe_phy_operations ops; + enum ixgbe_phy_type type; + u32 addr; + u32 id; + enum ixgbe_sfp_type sfp_type; + bool sfp_setup_needed; + u32 revision; + enum ixgbe_media_type media_type; + bool reset_disable; + ixgbe_autoneg_advertised autoneg_advertised; + enum ixgbe_smart_speed smart_speed; + bool smart_speed_active; + bool multispeed_fiber; + bool reset_if_overtemp; + bool qsfp_shared_i2c_bus; +}; + +#include "ixgbe_mbx.h" + +struct ixgbe_mbx_operations { + void (*init_params)(struct ixgbe_hw *hw); + s32 (*read)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*write)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*read_posted)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*check_for_msg)(struct ixgbe_hw *, u16); + s32 (*check_for_ack)(struct ixgbe_hw *, u16); + s32 (*check_for_rst)(struct ixgbe_hw *, u16); +}; + +struct ixgbe_mbx_stats { + u32 msgs_tx; + u32 msgs_rx; + + u32 acks; + u32 reqs; + u32 rsts; +}; + +struct ixgbe_mbx_info { + struct ixgbe_mbx_operations ops; + struct ixgbe_mbx_stats stats; + u32 timeout; + u32 udelay; + u32 v2p_mailbox; + u16 size; +}; + +struct ixgbe_hw { + u8 __iomem *hw_addr; + void *back; + struct ixgbe_mac_info mac; + struct ixgbe_addr_filter_info addr_ctrl; + struct ixgbe_fc_info fc; + struct ixgbe_phy_info phy; + struct ixgbe_eeprom_info eeprom; + struct ixgbe_bus_info bus; + struct ixgbe_mbx_info mbx; + u16 device_id; + u16 vendor_id; + u16 subsystem_device_id; + u16 subsystem_vendor_id; + u8 revision_id; + bool adapter_stopped; + bool force_full_reset; + bool allow_unsupported_sfp; +}; + +#define ixgbe_call_func(hw, func, params, error) \ + (func != NULL) ? func params : error + + +/* Error Codes */ +#define IXGBE_ERR_EEPROM -1 +#define IXGBE_ERR_EEPROM_CHECKSUM -2 +#define IXGBE_ERR_PHY -3 +#define IXGBE_ERR_CONFIG -4 +#define IXGBE_ERR_PARAM -5 +#define IXGBE_ERR_MAC_TYPE -6 +#define IXGBE_ERR_UNKNOWN_PHY -7 +#define IXGBE_ERR_LINK_SETUP -8 +#define IXGBE_ERR_ADAPTER_STOPPED -9 +#define IXGBE_ERR_INVALID_MAC_ADDR -10 +#define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 +#define IXGBE_ERR_MASTER_REQUESTS_PENDING -12 +#define IXGBE_ERR_INVALID_LINK_SETTINGS -13 +#define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 +#define IXGBE_ERR_RESET_FAILED -15 +#define IXGBE_ERR_SWFW_SYNC -16 +#define IXGBE_ERR_PHY_ADDR_INVALID -17 +#define IXGBE_ERR_I2C -18 +#define IXGBE_ERR_SFP_NOT_SUPPORTED -19 +#define IXGBE_ERR_SFP_NOT_PRESENT -20 +#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21 +#define IXGBE_ERR_NO_SAN_ADDR_PTR -22 +#define IXGBE_ERR_FDIR_REINIT_FAILED -23 +#define IXGBE_ERR_EEPROM_VERSION -24 +#define IXGBE_ERR_NO_SPACE -25 +#define IXGBE_ERR_OVERTEMP -26 +#define IXGBE_ERR_FC_NOT_NEGOTIATED -27 +#define IXGBE_ERR_FC_NOT_SUPPORTED -28 +#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE -30 +#define IXGBE_ERR_PBA_SECTION -31 +#define IXGBE_ERR_INVALID_ARGUMENT -32 +#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33 +#define IXGBE_ERR_OUT_OF_MEM -34 + +#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF + +#define UNREFERENCED_XPARAMETER + +#endif /* _IXGBE_TYPE_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c new file mode 100644 index 00000000..b99d9e84 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c @@ -0,0 +1,937 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_x540.h" +#include "ixgbe_type.h" +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw); +static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw); +static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw); +static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw); + +/** + * ixgbe_init_ops_X540 - Inits func ptrs and MAC type + * @hw: pointer to hardware structure + * + * Initialize the function pointers and assign the MAC type for X540. + * Does not touch the hardware. + **/ +s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val; + + ret_val = ixgbe_init_phy_ops_generic(hw); + ret_val = ixgbe_init_ops_generic(hw); + + + /* EEPROM */ + eeprom->ops.init_params = &ixgbe_init_eeprom_params_X540; + eeprom->ops.read = &ixgbe_read_eerd_X540; + eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_X540; + eeprom->ops.write = &ixgbe_write_eewr_X540; + eeprom->ops.write_buffer = &ixgbe_write_eewr_buffer_X540; + eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_X540; + eeprom->ops.validate_checksum = &ixgbe_validate_eeprom_checksum_X540; + eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_X540; + + /* PHY */ + phy->ops.init = &ixgbe_init_phy_ops_generic; + phy->ops.reset = NULL; + + /* MAC */ + mac->ops.reset_hw = &ixgbe_reset_hw_X540; + mac->ops.get_media_type = &ixgbe_get_media_type_X540; + mac->ops.get_supported_physical_layer = + &ixgbe_get_supported_physical_layer_X540; + mac->ops.read_analog_reg8 = NULL; + mac->ops.write_analog_reg8 = NULL; + mac->ops.start_hw = &ixgbe_start_hw_X540; + mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic; + mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic; + mac->ops.get_device_caps = &ixgbe_get_device_caps_generic; + mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic; + mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic; + mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync_X540; + mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync_X540; + mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic; + mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic; + + /* RAR, Multicast, VLAN */ + mac->ops.set_vmdq = &ixgbe_set_vmdq_generic; + mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic; + mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic; + mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic; + mac->rar_highwater = 1; + mac->ops.set_vfta = &ixgbe_set_vfta_generic; + mac->ops.set_vlvf = &ixgbe_set_vlvf_generic; + mac->ops.clear_vfta = &ixgbe_clear_vfta_generic; + mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic; + mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing; + mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing; + + /* Link */ + mac->ops.get_link_capabilities = + &ixgbe_get_copper_link_capabilities_generic; + mac->ops.setup_link = &ixgbe_setup_mac_link_X540; + mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic; + mac->ops.check_link = &ixgbe_check_mac_link_generic; + + mac->mcft_size = 128; + mac->vft_size = 128; + mac->num_rar_entries = 128; + mac->rx_pb_size = 384; + mac->max_tx_queues = 128; + mac->max_rx_queues = 128; + mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); + + /* + * FWSM register + * ARC supported; valid only if manageability features are + * enabled. + */ + mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) & + IXGBE_FWSM_MODE_MASK) ? true : false; + + //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf; + + /* LEDs */ + mac->ops.blink_led_start = ixgbe_blink_led_start_X540; + mac->ops.blink_led_stop = ixgbe_blink_led_stop_X540; + + /* Manageability interface */ + mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic; + + return ret_val; +} + +/** + * ixgbe_get_link_capabilities_X540 - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: true when autoneg or autotry is enabled + * + * Determines the link capabilities by reading the AUTOC register. + **/ +s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + ixgbe_get_copper_link_capabilities_generic(hw, speed, autoneg); + + return 0; +} + +/** + * ixgbe_get_media_type_X540 - Get media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw) +{ + return ixgbe_media_type_copper; +} + +/** + * ixgbe_setup_mac_link_X540 - Sets the auto advertised capabilities + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + **/ +s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + return hw->phy.ops.setup_link_speed(hw, speed, autoneg, + autoneg_wait_to_complete); +} + +/** + * ixgbe_reset_hw_X540 - Perform hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks + * and clears all interrupts, and perform a reset. + **/ +s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw) +{ + s32 status = 0; + + /* + * Userland DPDK takes the ownershiop of device + * Kernel driver here used as the simple path for ethtool only + * Won't real reset device anyway + */ +#if 0 + u32 ctrl, i; + + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.ops.stop_adapter(hw); + if (status != 0) + goto reset_hw_out; + + /* flush pending Tx transactions */ + ixgbe_clear_tx_pending(hw); + +mac_reset_top: + ctrl = IXGBE_CTRL_RST; + ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + udelay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST_MASK)) + break; + } + + if (ctrl & IXGBE_CTRL_RST_MASK) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "Reset polling failed to complete.\n"); + } + msleep(100); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + /* Set the Rx packet buffer size. */ + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT); + +#endif + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* + * Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table. Also reset num_rar_entries to 128, + * since we modify this value when programming the SAN MAC address. + */ + hw->mac.num_rar_entries = 128; + hw->mac.ops.init_rx_addrs(hw); + + /* Store the permanent SAN mac address */ + hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); + + /* Add the SAN MAC address to the RAR only if it's a valid address */ + if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { + hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, + hw->mac.san_addr, 0, IXGBE_RAH_AV); + + /* Save the SAN MAC RAR index */ + hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + + /* Reserve the last RAR for the SAN MAC address */ + hw->mac.num_rar_entries--; + } + + /* Store the alternative WWNN/WWPN prefix */ + hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix, + &hw->mac.wwpn_prefix); + +//reset_hw_out: + return status; +} + +/** + * ixgbe_start_hw_X540 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function + * and the generation start_hw function. + * Then performs revision-specific operations, if any. + **/ +s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + + ret_val = ixgbe_start_hw_generic(hw); + if (ret_val != 0) + goto out; + + ret_val = ixgbe_start_hw_gen2(hw); + +out: + return ret_val; +} + +/** + * ixgbe_get_supported_physical_layer_X540 - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw) +{ + u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + u16 ext_ability = 0; + + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; + if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + + return physical_layer; +} + +/** + * ixgbe_init_eeprom_params_X540 - Initialize EEPROM params + * @hw: pointer to hardware structure + * + * Initializes the EEPROM parameters ixgbe_eeprom_info within the + * ixgbe_hw struct in order to set up EEPROM access. + **/ +s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + u32 eec; + u16 eeprom_size; + + if (eeprom->type == ixgbe_eeprom_uninitialized) { + eeprom->semaphore_delay = 10; + eeprom->type = ixgbe_flash; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> + IXGBE_EEC_SIZE_SHIFT); + eeprom->word_size = 1 << (eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); + + hw_dbg(hw, "Eeprom params: type = %d, size = %d\n", + eeprom->type, eeprom->word_size); + } + + return 0; +} + +/** + * ixgbe_read_eerd_X540- Read EEPROM word using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_read_eerd_generic(hw, offset, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_read_eerd_buffer_X540- Read EEPROM word(s) using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @words: number of words + * @data: word(s) read from the EEPROM + * + * Reads a 16 bit word(s) from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, + u16 offset, u16 words, u16 *data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_read_eerd_buffer_generic(hw, offset, + words, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_write_eewr_X540 - Write EEPROM word using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @data: word write to the EEPROM + * + * Write a 16 bit word to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_write_eewr_generic(hw, offset, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_write_eewr_buffer_X540 - Write EEPROM word(s) using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @words: number of words + * @data: word(s) write to the EEPROM + * + * Write a 16 bit word(s) to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, + u16 offset, u16 words, u16 *data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_write_eewr_buffer_generic(hw, offset, + words, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_calc_eeprom_checksum_X540 - Calculates and returns the checksum + * + * This function does not use synchronization for EERD and EEWR. It can + * be used internally by function which utilize ixgbe_acquire_swfw_sync_X540. + * + * @hw: pointer to hardware structure + **/ +u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) +{ + u16 i; + u16 j; + u16 checksum = 0; + u16 length = 0; + u16 pointer = 0; + u16 word = 0; + + /* + * Do not use hw->eeprom.ops.read because we do not want to take + * the synchronization semaphores here. Instead use + * ixgbe_read_eerd_generic + */ + + /* Include 0x0-0x3F in the checksum */ + for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { + if (ixgbe_read_eerd_generic(hw, i, &word) != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + checksum += word; + } + + /* + * Include all data from pointers 0x3, 0x6-0xE. This excludes the + * FW, PHY module, and PCIe Expansion/Option ROM pointers. + */ + for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { + if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR) + continue; + + if (ixgbe_read_eerd_generic(hw, i, &pointer) != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + + /* Skip pointer section if the pointer is invalid. */ + if (pointer == 0xFFFF || pointer == 0 || + pointer >= hw->eeprom.word_size) + continue; + + if (ixgbe_read_eerd_generic(hw, pointer, &length) != + 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + + /* Skip pointer section if length is invalid. */ + if (length == 0xFFFF || length == 0 || + (pointer + length) >= hw->eeprom.word_size) + continue; + + for (j = pointer+1; j <= pointer+length; j++) { + if (ixgbe_read_eerd_generic(hw, j, &word) != + 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + checksum += word; + } + } + + checksum = (u16)IXGBE_EEPROM_SUM - checksum; + + return checksum; +} + +/** + * ixgbe_validate_eeprom_checksum_X540 - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum. If the + * caller does not need checksum_val, the value can be NULL. + **/ +s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, + u16 *checksum_val) +{ + s32 status; + u16 checksum; + u16 read_checksum = 0; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + goto out; + } + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + + /* + * Do not use hw->eeprom.ops.read because we do not want to take + * the synchronization semaphores twice here. + */ + ixgbe_read_eerd_generic(hw, IXGBE_EEPROM_CHECKSUM, + &read_checksum); + + /* + * Verify read checksum from EEPROM is the same as + * calculated checksum + */ + if (read_checksum != checksum) + status = IXGBE_ERR_EEPROM_CHECKSUM; + + /* If the user cares, return the calculated checksum */ + if (checksum_val) + *checksum_val = checksum; + } else { + status = IXGBE_ERR_SWFW_SYNC; + } + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); +out: + return status; +} + +/** + * ixgbe_update_eeprom_checksum_X540 - Updates the EEPROM checksum and flash + * @hw: pointer to hardware structure + * + * After writing EEPROM to shadow RAM using EEWR register, software calculates + * checksum and updates the EEPROM and instructs the hardware to update + * the flash. + **/ +s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw) +{ + s32 status; + u16 checksum; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status != 0) + hw_dbg(hw, "EEPROM read failed\n"); + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + + /* + * Do not use hw->eeprom.ops.write because we do not want to + * take the synchronization semaphores twice here. + */ + status = ixgbe_write_eewr_generic(hw, IXGBE_EEPROM_CHECKSUM, + checksum); + + if (status == 0) + status = ixgbe_update_flash_X540(hw); + else + status = IXGBE_ERR_SWFW_SYNC; + } + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + + return status; +} + +/** + * ixgbe_update_flash_X540 - Instruct HW to copy EEPROM to Flash device + * @hw: pointer to hardware structure + * + * Set FLUP (bit 23) of the EEC register to instruct Hardware to copy + * EEPROM from shadow RAM to the flash device. + **/ +static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw) +{ + u32 flup; + s32 status = IXGBE_ERR_EEPROM; + + status = ixgbe_poll_flash_update_done_X540(hw); + if (status == IXGBE_ERR_EEPROM) { + hw_dbg(hw, "Flash update time out\n"); + goto out; + } + + flup = IXGBE_READ_REG(hw, IXGBE_EEC) | IXGBE_EEC_FLUP; + IXGBE_WRITE_REG(hw, IXGBE_EEC, flup); + + status = ixgbe_poll_flash_update_done_X540(hw); + if (status == 0) + hw_dbg(hw, "Flash update complete\n"); + else + hw_dbg(hw, "Flash update time out\n"); + + if (hw->revision_id == 0) { + flup = IXGBE_READ_REG(hw, IXGBE_EEC); + + if (flup & IXGBE_EEC_SEC1VAL) { + flup |= IXGBE_EEC_FLUP; + IXGBE_WRITE_REG(hw, IXGBE_EEC, flup); + } + + status = ixgbe_poll_flash_update_done_X540(hw); + if (status == 0) + hw_dbg(hw, "Flash update complete\n"); + else + hw_dbg(hw, "Flash update time out\n"); + } +out: + return status; +} + +/** + * ixgbe_poll_flash_update_done_X540 - Poll flash update status + * @hw: pointer to hardware structure + * + * Polls the FLUDONE (bit 26) of the EEC Register to determine when the + * flash update is done. + **/ +static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw) +{ + u32 i; + u32 reg; + s32 status = IXGBE_ERR_EEPROM; + + for (i = 0; i < IXGBE_FLUDONE_ATTEMPTS; i++) { + reg = IXGBE_READ_REG(hw, IXGBE_EEC); + if (reg & IXGBE_EEC_FLUDONE) { + status = 0; + break; + } + udelay(5); + } + return status; +} + +/** + * ixgbe_acquire_swfw_sync_X540 - Acquire SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to acquire + * + * Acquires the SWFW semaphore thought the SW_FW_SYNC register for + * the specified function (CSR, PHY0, PHY1, NVM, Flash) + **/ +s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 5; + u32 hwmask = 0; + u32 timeout = 200; + u32 i; + s32 ret_val = 0; + + if (swmask == IXGBE_GSSR_EEP_SM) + hwmask = IXGBE_GSSR_FLASH_SM; + + /* SW only mask doesn't have FW bit pair */ + if (swmask == IXGBE_GSSR_SW_MNG_SM) + fwmask = 0; + + for (i = 0; i < timeout; i++) { + /* + * SW NVM semaphore bit is used for access to all + * SW_FW_SYNC bits (not just NVM) + */ + if (ixgbe_get_swfw_sync_semaphore(hw)) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + if (!(swfw_sync & (fwmask | swmask | hwmask))) { + swfw_sync |= swmask; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); + goto out; + } else { + /* + * Firmware currently using resource (fwmask), hardware + * currently using resource (hwmask), or other software + * thread currently using resource (swmask) + */ + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); + } + } + + /* Failed to get SW only semaphore */ + if (swmask == IXGBE_GSSR_SW_MNG_SM) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + /* If the resource is not released by the FW/HW the SW can assume that + * the FW/HW malfunctions. In that case the SW should sets the SW bit(s) + * of the requested resource(s) while ignoring the corresponding FW/HW + * bits in the SW_FW_SYNC register. + */ + swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + if (swfw_sync & (fwmask | hwmask)) { + if (ixgbe_get_swfw_sync_semaphore(hw)) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); + } + +out: + return ret_val; +} + +/** + * ixgbe_release_swfw_sync_X540 - Release SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to release + * + * Releases the SWFW semaphore through the SW_FW_SYNC register + * for the specified function (CSR, PHY0, PHY1, EVM, Flash) + **/ +void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + + ixgbe_get_swfw_sync_semaphore(hw); + + swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + swfw_sync &= ~swmask; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); + + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); +} + +/** + * ixgbe_get_nvm_semaphore - Get hardware semaphore + * @hw: pointer to hardware structure + * + * Sets the hardware semaphores so SW/FW can gain control of shared resources + **/ +static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_EEPROM; + u32 timeout = 2000; + u32 i; + u32 swsm; + + /* Get SMBI software semaphore between device drivers first */ + for (i = 0; i < timeout; i++) { + /* + * If the SMBI bit is 0 when we read it, then the bit will be + * set and we have the semaphore + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (!(swsm & IXGBE_SWSM_SMBI)) { + status = 0; + break; + } + udelay(50); + } + + /* Now get the semaphore between SW/FW through the REGSMP bit */ + if (status == 0) { + for (i = 0; i < timeout; i++) { + swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + if (!(swsm & IXGBE_SWFW_REGSMP)) + break; + + udelay(50); + } + + /* + * Release semaphores and return error if SW NVM semaphore + * was not granted because we don't have access to the EEPROM + */ + if (i >= timeout) { + hw_dbg(hw, "REGSMP Software NVM semaphore not " + "granted.\n"); + ixgbe_release_swfw_sync_semaphore(hw); + status = IXGBE_ERR_EEPROM; + } + } else { + hw_dbg(hw, "Software semaphore SMBI between device drivers " + "not granted.\n"); + } + + return status; +} + +/** + * ixgbe_release_nvm_semaphore - Release hardware semaphore + * @hw: pointer to hardware structure + * + * This function clears hardware semaphore bits. + **/ +static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw) +{ + u32 swsm; + + /* Release both semaphores by writing 0 to the bits REGSMP and SMBI */ + + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + swsm &= ~IXGBE_SWSM_SMBI; + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + + swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + swsm &= ~IXGBE_SWFW_REGSMP; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm); + + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_blink_led_start_X540 - Blink LED based on index. + * @hw: pointer to hardware structure + * @index: led number to blink + * + * Devices that implement the version 2 interface: + * X540 + **/ +s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) +{ + u32 macc_reg; + u32 ledctl_reg; + ixgbe_link_speed speed; + bool link_up; + + /* + * Link should be up in order for the blink bit in the LED control + * register to work. Force link and speed in the MAC if link is down. + * This will be reversed when we stop the blinking. + */ + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (link_up == false) { + macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC); + macc_reg |= IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS; + IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg); + } + /* Set the LED to LINK_UP + BLINK. */ + ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + ledctl_reg &= ~IXGBE_LED_MODE_MASK(index); + ledctl_reg |= IXGBE_LED_BLINK(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_blink_led_stop_X540 - Stop blinking LED based on index. + * @hw: pointer to hardware structure + * @index: led number to stop blinking + * + * Devices that implement the version 2 interface: + * X540 + **/ +s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) +{ + u32 macc_reg; + u32 ledctl_reg; + + /* Restore the LED to its default value. */ + ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + ledctl_reg &= ~IXGBE_LED_MODE_MASK(index); + ledctl_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index); + ledctl_reg &= ~IXGBE_LED_BLINK(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg); + + /* Unforce link and speed in the MAC. */ + macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC); + macc_reg &= ~(IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS); + IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h new file mode 100644 index 00000000..77e8952d --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h @@ -0,0 +1,58 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_X540_H_ +#define _IXGBE_X540_H_ + +#include "ixgbe_type.h" + +s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *autoneg); +enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw); +s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, bool link_up_wait_to_complete); +s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw); +u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw); + +s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw); +s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words, + u16 *data); +s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words, + u16 *data); +s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw); +s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, u16 *checksum_val); +u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw); + +s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask); +void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask); + +s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index); +#endif /* _IXGBE_X540_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c new file mode 100644 index 00000000..5f2523ed --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c @@ -0,0 +1,1246 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe.h" +#include "kcompat.h" + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +/* From lib/vsprintf.c */ +#include + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define _kc_ZEROPAD 1 /* pad with zero */ +#define _kc_SIGN 2 /* unsigned/signed long */ +#define _kc_PLUS 4 /* show plus */ +#define _kc_SPACE 8 /* space if plus */ +#define _kc_LEFT 16 /* left justified */ +#define _kc_SPECIAL 32 /* 0x */ +#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & _kc_LARGE) ? large_digits : small_digits; + if (type & _kc_LEFT) + type &= ~_kc_ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & _kc_ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & _kc_SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & _kc_PLUS) { + sign = '+'; + size--; + } else if (type & _kc_SPACE) { + sign = ' '; + size--; + } + } + if (type & _kc_SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { + while(size-->0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + } + if (sign) { + if (buf <= end) + *buf = sign; + ++buf; + } + if (type & _kc_SPECIAL) { + if (base==8) { + if (buf <= end) + *buf = '0'; + ++buf; + } else if (base==16) { + if (buf <= end) + *buf = '0'; + ++buf; + if (buf <= end) + *buf = digits[33]; + ++buf; + } + } + if (!(type & _kc_LEFT)) { + while (size-- > 0) { + if (buf <= end) + *buf = c; + ++buf; + } + } + while (i < precision--) { + if (buf <= end) + *buf = '0'; + ++buf; + } + while (i-- > 0) { + if (buf <= end) + *buf = tmp[i]; + ++buf; + } + while (size-- > 0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + return buf; +} + +int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) { + end = ((void *) -1); + size = end - buf + 1; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str <= end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= _kc_LEFT; goto repeat; + case '+': flags |= _kc_PLUS; goto repeat; + case ' ': flags |= _kc_SPACE; goto repeat; + case '#': flags |= _kc_SPECIAL; goto repeat; + case '0': flags |= _kc_ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= _kc_LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & _kc_LEFT)) { + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + } + c = (unsigned char) va_arg(args, int); + if (str <= end) + *str = c; + ++str; + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & _kc_LEFT)) { + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + } + for (i = 0; i < len; ++i) { + if (str <= end) + *str = *s; + ++str; ++s; + } + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= _kc_ZEROPAD; + } + str = number(str, end, + (unsigned long) va_arg(args, void *), + 16, field_width, precision, flags); + continue; + + + case 'n': + /* FIXME: + * What does C99 say about the overflow case here? */ + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= _kc_LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= _kc_SIGN; + case 'u': + break; + + default: + if (str <= end) + *str = '%'; + ++str; + if (*fmt) { + if (str <= end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & _kc_SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & _kc_SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & _kc_SIGN) + num = (signed int) num; + } + str = number(str, end, num, base, + field_width, precision, flags); + } + if (str <= end) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str-buf; +} + +int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = _kc_vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} +#endif /* < 2.4.8 */ + + + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + unsigned int vfs_assigned = 0; +#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED + int pos; + struct pci_dev *vfdev; + unsigned short dev_id; + + /* only search if we are a PF */ + if (!dev->is_physfn) + return 0; + + /* find SR-IOV capability */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return 0; + + /* + * * determine the device ID for the VFs, the vendor ID will be the + * * same as the PF so there is no need to check for that one + * */ + pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); + + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(dev->vendor, dev_id, NULL); + while (vfdev) { + /* + * * It is considered assigned if it is a virtual function with + * * our dev as the physical function and the assigned bit is set + * */ + if (vfdev->is_virtfn && (vfdev->physfn == dev) && + (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) + vfs_assigned++; + + vfdev = pci_get_device(dev->vendor, dev_id, vfdev); + } + +#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ + return vfs_assigned; +} + +#endif /* CONFIG_PCI_IOV */ +#endif /* 3.10.0 */ + + + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#if defined(CONFIG_HIGHMEM) + +#ifndef PCI_DRAM_OFFSET +#define PCI_DRAM_OFFSET 0 +#endif + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return ((u64) (page - mem_map) << PAGE_SHIFT) + offset + + PCI_DRAM_OFFSET; +} + +#else /* CONFIG_HIGHMEM */ + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return pci_map_single(dev, (void *)page_address(page) + offset, size, + direction); +} + +#endif /* CONFIG_HIGHMEM */ + +void +_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, + int direction) +{ + return pci_unmap_single(dev, dma_addr, size, direction); +} + +#endif /* 2.4.13 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +int +_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) +{ + if (!pci_dma_supported(dev, mask)) + return -EIO; + dev->dma_mask = mask; + return 0; +} + +int +_kc_pci_request_regions(struct pci_dev *dev, char *res_name) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) { + if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { + if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } + } + return 0; +} + +void +_kc_pci_release_regions(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) + release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + + else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) + release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + } +} + +/**************************************/ +/* NETWORK DRIVER API */ + +struct net_device * +_kc_alloc_etherdev(int sizeof_priv) +{ + struct net_device *dev; + int alloc_size; + + alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; + dev = kzalloc(alloc_size, GFP_KERNEL); + if (!dev) + return NULL; + + if (sizeof_priv) + dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); + dev->name[0] = '\0'; + ether_setup(dev); + + return dev; +} + +int +_kc_is_valid_ether_addr(u8 *addr) +{ + const char zaddr[6] = { 0, }; + + return !(addr[0] & 1) && memcmp(addr, zaddr, 6); +} + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +int +_kc_pci_set_power_state(struct pci_dev *dev, int state) +{ + return 0; +} + +int +_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) +{ + return 0; +} + +#endif /* 2.4.6 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + frag->page = page; + frag->page_offset = off; + frag->size = size; + skb_shinfo(skb)->nr_frags = i + 1; +} + +/* + * Original Copyright: + * find_next_bit.c: fallback find next bit implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + ffs(tmp); +} + +size_t _kc_strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +#endif /* 2.6.0 => 2.4.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + return (i >= size) ? (size - 1) : i; +} +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +char *_kc_kstrdup(const char *s, unsigned int gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s) + 1; + buf = kmalloc(len, gfp); + if (buf) + memcpy(buf, s, len); + return buf; +} +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +void *_kc_kzalloc(size_t size, int flags) +{ + void *ret = kmalloc(size, flags); + if (ret) + memset(ret, 0, size); + return ret; +} +#endif /* <= 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) +int _kc_skb_pad(struct sk_buff *skb, int pad) +{ + int ntail; + + /* If the skbuff is non linear tailroom is always zero.. */ + if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { + memset(skb->data+skb->len, 0, pad); + return 0; + } + + ntail = skb->data_len + pad - (skb->end - skb->tail); + if (likely(skb_cloned(skb) || ntail > 0)) { + if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); + goto free_skb; + } + +#ifdef MAX_SKB_FRAGS + if (skb_is_nonlinear(skb) && + !__pskb_pull_tail(skb, skb->data_len)) + goto free_skb; + +#endif + memset(skb->data + skb->len, 0, pad); + return 0; + +free_skb: + kfree_skb(skb); + return -ENOMEM; +} + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +int _kc_pci_save_state(struct pci_dev *pdev) +{ + struct adapter_struct *adapter = pci_get_drvdata(pdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset, pcie_link_status; + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) + /* no ->dev for 2.4 kernels */ + WARN_ON(pdev->dev.driver_data == NULL); +#endif + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset) { + if (!pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + } + pci_config_space_ich8lan(); +#ifdef HAVE_PCI_ERS + if (adapter->config_space == NULL) +#else + WARN_ON(adapter->config_space != NULL); +#endif + adapter->config_space = kmalloc(size, GFP_KERNEL); + if (!adapter->config_space) { + printk(KERN_ERR "Out of memory in pci_save_state\n"); + return -ENOMEM; + } + for (i = 0; i < (size / 4); i++) + pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); + return 0; +} + +void _kc_pci_restore_state(struct pci_dev *pdev) +{ + struct adapter_struct *adapter = pci_get_drvdata(pdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset; + u16 pcie_link_status; + + if (adapter->config_space != NULL) { + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset && + !pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + + pci_config_space_ich8lan(); + for (i = 0; i < (size / 4); i++) + pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); +#ifndef HAVE_PCI_ERS + kfree(adapter->config_space); + adapter->config_space = NULL; +#endif + } +} +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +void _kc_free_netdev(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + + if (adapter->config_space != NULL) + kfree(adapter->config_space); +#ifdef CONFIG_SYSFS + if (netdev->reg_state == NETREG_UNINITIALIZED) { + kfree((char *)netdev - netdev->padded); + } else { + BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); + netdev->reg_state = NETREG_RELEASED; + class_device_put(&netdev->class_dev); + } +#else + kfree((char *)netdev - netdev->padded); +#endif +} +#endif + +void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) +{ + void *p; + + p = kzalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} +#endif /* <= 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +/* hexdump code taken from lib/hexdump.c */ +static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, + int groupsize, unsigned char *linebuf, + size_t linebuflen, bool ascii) +{ + const u8 *ptr = buf; + u8 ch; + int j, lx = 0; + int ascii_column; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + if (!len) + goto nil; + if (len > rowsize) /* limit to one line at a time */ + len = rowsize; + if ((len % groupsize) != 0) /* no mixed size output */ + groupsize = 1; + + switch (groupsize) { + case 8: { + const u64 *ptr8 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); + ascii_column = 17 * ngroups + 2; + break; + } + + case 4: { + const u32 *ptr4 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%8.8x", j ? " " : "", *(ptr4 + j)); + ascii_column = 9 * ngroups + 2; + break; + } + + case 2: { + const u16 *ptr2 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%4.4x", j ? " " : "", *(ptr2 + j)); + ascii_column = 5 * ngroups + 2; + break; + } + + default: + for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = hex_asc(ch >> 4); + linebuf[lx++] = hex_asc(ch & 0x0f); + linebuf[lx++] = ' '; + } + if (j) + lx--; + + ascii_column = 3 * rowsize + 2; + break; + } + if (!ascii) + goto nil; + + while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) + linebuf[lx++] = ' '; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) + linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] + : '.'; +nil: + linebuf[lx++] = '\0'; +} + +void _kc_print_hex_dump(const char *level, + const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ + const u8 *ptr = buf; + int i, linelen, remaining = len; + unsigned char linebuf[200]; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + for (i = 0; i < len; i += rowsize) { + linelen = min(remaining, rowsize); + remaining -= rowsize; + _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, + linebuf, sizeof(linebuf), ascii); + + switch (prefix_type) { + case DUMP_PREFIX_ADDRESS: + printk("%s%s%*p: %s\n", level, prefix_str, + (int)(2 * sizeof(void *)), ptr + i, linebuf); + break; + case DUMP_PREFIX_OFFSET: + printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); + break; + default: + printk("%s%s%s\n", level, prefix_str, linebuf); + break; + } + } +} +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) +int ixgbe_dcb_netlink_register(void) +{ + return 0; +} + +int ixgbe_dcb_netlink_unregister(void) +{ + return 0; +} + +int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max) +{ + return 0; +} +#endif /* < 2.6.23 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifdef NAPI +struct net_device *napi_to_poll_dev(struct napi_struct *napi) +{ + struct adapter_q_vector *q_vector = container_of(napi, + struct adapter_q_vector, + napi); + return &q_vector->poll_dev; +} + +int __kc_adapter_clean(struct net_device *netdev, int *budget) +{ + int work_done; + int work_to_do = min(*budget, netdev->quota); + /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ + struct napi_struct *napi = netdev->priv; + work_done = napi->poll(napi, work_to_do); + *budget -= work_done; + netdev->quota -= work_done; + return (work_done >= work_to_do) ? 1 : 0; +} +#endif /* NAPI */ +#endif /* <= 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) +{ + struct pci_dev *parent = pdev->bus->self; + u16 link_state; + int pos; + + if (!parent) + return; + + pos = pci_find_capability(parent, PCI_CAP_ID_EXP); + if (pos) { + pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); + link_state &= ~state; + pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); + } +} +#endif /* < 2.6.26 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +#ifdef HAVE_TX_MQ +void _kc_netif_tx_stop_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_stop_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +} +void _kc_netif_tx_wake_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_wake_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_wake_subqueue(netdev, i); +} +void _kc_netif_tx_start_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_start_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_start_subqueue(netdev, i); +} +#endif /* HAVE_TX_MQ */ + +#ifndef __WARN_printf +void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) +{ + va_list args; + + printk(KERN_WARNING "------------[ cut here ]------------\n"); + printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + + dump_stack(); +} +#endif /* __WARN_printf */ +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) + +int +_kc_pci_prepare_to_sleep(struct pci_dev *dev) +{ + pci_power_t target_state; + int error; + + target_state = pci_choose_state(dev, PMSG_SUSPEND); + + pci_enable_wake(dev, target_state, true); + + error = pci_set_power_state(dev, target_state); + + if (error) + pci_enable_wake(dev, target_state, false); + + return error; +} + +int +_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) +{ + int err; + + err = pci_enable_wake(dev, PCI_D3cold, enable); + if (err) + goto out; + + err = pci_enable_wake(dev, PCI_D3hot, enable); + +out: + return err; +} +#endif /* < 2.6.28 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, + int off, int size) +{ + skb_fill_page_desc(skb, i, page, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += size; +} +#endif /* < 3.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) +#ifdef HAVE_NETDEV_SELECT_QUEUE +#include +static u32 _kc_simple_tx_hashrnd; +static u32 _kc_simple_tx_hashrnd_initialized; + +u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb) +{ + u32 addr1, addr2, ports; + u32 hash, ihl; + u8 ip_proto = 0; + + if (unlikely(!_kc_simple_tx_hashrnd_initialized)) { + get_random_bytes(&_kc_simple_tx_hashrnd, 4); + _kc_simple_tx_hashrnd_initialized = 1; + } + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) + ip_proto = ip_hdr(skb)->protocol; + addr1 = ip_hdr(skb)->saddr; + addr2 = ip_hdr(skb)->daddr; + ihl = ip_hdr(skb)->ihl; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + ip_proto = ipv6_hdr(skb)->nexthdr; + addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; + addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; + ihl = (40 >> 2); + break; +#endif + default: + return 0; + } + + + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); + break; + + default: + ports = 0; + break; + } + + hash = jhash_3words(addr1, addr2, ports, _kc_simple_tx_hashrnd); + + return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); +} +#endif /* HAVE_NETDEV_SELECT_QUEUE */ +#endif /* < 2.6.30 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) +#ifdef HAVE_TX_MQ +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + struct Qdisc *qdisc; + int i; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if ( txq < real_num) { + dev->real_num_tx_queues = txq; + for (i = txq; i < dev->num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); + } + } + } +} +#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ +#endif /* HAVE_TX_MQ */ +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +static const u32 _kc_flags_dup_features = + (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); + +u32 _kc_ethtool_op_get_flags(struct net_device *dev) +{ + return dev->features & _kc_flags_dup_features; +} + +int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) +{ + if (data & ~supported) + return -EINVAL; + + dev->features = ((dev->features & ~_kc_flags_dup_features) | + (data & _kc_flags_dup_features)); + return 0; +} +#endif /* < 2.6.36 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) +u8 _kc_netdev_get_num_tc(struct net_device *dev) +{ + struct adapter_struct *kc_adapter = netdev_priv(dev); + if (kc_adapter->flags & IXGBE_FLAG_DCB_ENABLED) + return kc_adapter->tc; + else + return 0; +} + +u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up) +{ + struct adapter_struct *kc_adapter = netdev_priv(dev); + int tc; + u8 map; + + for (tc = 0; tc < IXGBE_DCB_MAX_TRAFFIC_CLASS; tc++) { + map = kc_adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap; + + if (map & (1 << up)) + return tc; + } + + return 0; +} +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ +#endif /* < 2.6.39 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h new file mode 100644 index 00000000..bf27579b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h @@ -0,0 +1,3143 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _KCOMPAT_H_ +#define _KCOMPAT_H_ + +#ifndef LINUX_VERSION_CODE +#include +#else +#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* NAPI enable/disable flags here */ +/* enable NAPI for ixgbe by default */ +#undef CONFIG_IXGBE_NAPI +#define CONFIG_IXGBE_NAPI +#define NAPI +#ifdef CONFIG_IXGBE_NAPI +#undef NAPI +#define NAPI +#endif /* CONFIG_IXGBE_NAPI */ +#ifdef IXGBE_NAPI +#undef NAPI +#define NAPI +#endif /* IXGBE_NAPI */ +#ifdef IXGBE_NO_NAPI +#undef NAPI +#endif /* IXGBE_NO_NAPI */ + +#define adapter_struct ixgbe_adapter +#define adapter_q_vector ixgbe_q_vector + +/* and finally set defines so that the code sees the changes */ +#ifdef NAPI +#ifndef CONFIG_IXGBE_NAPI +#define CONFIG_IXGBE_NAPI +#endif +#else +#undef CONFIG_IXGBE_NAPI +#endif /* NAPI */ + +/* packet split disable/enable */ +#ifdef DISABLE_PACKET_SPLIT +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT +#define CONFIG_IXGBE_DISABLE_PACKET_SPLIT +#endif +#endif /* DISABLE_PACKET_SPLIT */ + +/* MSI compatibility code for all kernels and drivers */ +#ifdef DISABLE_PCI_MSI +#undef CONFIG_PCI_MSI +#endif +#ifndef CONFIG_PCI_MSI +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +struct msix_entry { + u16 vector; /* kernel uses to write allocated vector */ + u16 entry; /* driver uses to specify entry, OS writes */ +}; +#endif +#undef pci_enable_msi +#define pci_enable_msi(a) -ENOTSUPP +#undef pci_disable_msi +#define pci_disable_msi(a) do {} while (0) +#undef pci_enable_msix +#define pci_enable_msix(a, b, c) -ENOTSUPP +#undef pci_disable_msix +#define pci_disable_msix(a) do {} while (0) +#define msi_remove_pci_irq_vectors(a) do {} while (0) +#endif /* CONFIG_PCI_MSI */ +#ifdef DISABLE_PM +#undef CONFIG_PM +#endif + +#ifdef DISABLE_NET_POLL_CONTROLLER +#undef CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef PMSG_SUSPEND +#define PMSG_SUSPEND 3 +#endif + +/* generic boolean compatibility */ +#undef TRUE +#undef FALSE +#define TRUE true +#define FALSE false +#ifdef GCC_VERSION +#if ( GCC_VERSION < 3000 ) +#define _Bool char +#endif +#else +#define _Bool char +#endif + +/* kernels less than 2.4.14 don't have this */ +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef module_param +#define module_param(v,t,p) MODULE_PARM(v, "i"); +#endif + +#ifndef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffffffffffffULL +#endif + +#ifndef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0x00000000ffffffffULL +#endif + +#ifndef PCI_CAP_ID_EXP +#define PCI_CAP_ID_EXP 0x10 +#endif + +#ifndef PCIE_LINK_STATE_L0S +#define PCIE_LINK_STATE_L0S 1 +#endif +#ifndef PCIE_LINK_STATE_L1 +#define PCIE_LINK_STATE_L1 2 +#endif + +#ifndef mmiowb +#ifdef CONFIG_IA64 +#define mmiowb() asm volatile ("mf.a" ::: "memory") +#else +#define mmiowb() +#endif +#endif + +#ifndef SET_NETDEV_DEV +#define SET_NETDEV_DEV(net, pdev) +#endif + +#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#define free_netdev(x) kfree(x) +#endif + +#ifdef HAVE_POLL_CONTROLLER +#define CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef SKB_DATAREF_SHIFT +/* if we do not have the infrastructure to detect if skb_header is cloned + just return false in all cases */ +#define skb_header_cloned(x) 0 +#endif + +#ifndef NETIF_F_GSO +#define gso_size tso_size +#define gso_segs tso_segs +#endif + +#ifndef NETIF_F_GRO +#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \ + vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan) +#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb) +#endif + +#ifndef NETIF_F_SCTP_CSUM +#define NETIF_F_SCTP_CSUM 0 +#endif + +#ifndef NETIF_F_LRO +#define NETIF_F_LRO (1 << 15) +#endif + +#ifndef NETIF_F_NTUPLE +#define NETIF_F_NTUPLE (1 << 27) +#endif + +#ifndef IPPROTO_SCTP +#define IPPROTO_SCTP 132 +#endif + +#ifndef CHECKSUM_PARTIAL +#define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW +#endif + +#ifndef __read_mostly +#define __read_mostly +#endif + +#ifndef MII_RESV1 +#define MII_RESV1 0x17 /* Reserved... */ +#endif + +#ifndef unlikely +#define unlikely(_x) _x +#define likely(_x) _x +#endif + +#ifndef WARN_ON +#define WARN_ON(x) +#endif + +#ifndef PCI_DEVICE +#define PCI_DEVICE(vend,dev) \ + .vendor = (vend), .device = (dev), \ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID +#endif + +#ifndef node_online +#define node_online(node) ((node) == 0) +#endif + +#ifndef num_online_cpus +#define num_online_cpus() smp_num_cpus +#endif + +#ifndef cpu_online +#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map) +#endif + +#ifndef _LINUX_RANDOM_H +#include +#endif + +#ifndef DECLARE_BITMAP +#ifndef BITS_TO_LONGS +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#endif +#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)] +#endif + +#ifndef VLAN_HLEN +#define VLAN_HLEN 4 +#endif + +#ifndef VLAN_ETH_HLEN +#define VLAN_ETH_HLEN 18 +#endif + +#ifndef VLAN_ETH_FRAME_LEN +#define VLAN_ETH_FRAME_LEN 1518 +#endif + +#if !defined(IXGBE_DCA) && !defined(IGB_DCA) +#define dca_get_tag(b) 0 +#define dca_add_requester(a) -1 +#define dca_remove_requester(b) do { } while(0) +#define DCA_PROVIDER_ADD 0x0001 +#define DCA_PROVIDER_REMOVE 0x0002 +#endif + +#ifndef DCA_GET_TAG_TWO_ARGS +#define dca3_get_tag(a,b) dca_get_tag(b) +#endif + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#if defined(__i386__) || defined(__x86_64__) +#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#endif +#endif + +/* taken from 2.6.24 definition in linux/kernel.h */ +#ifndef IS_ALIGNED +#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0) +#endif + +#ifndef NETIF_F_HW_VLAN_TX +struct _kc_vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_ethhdr _kc_vlan_ethhdr +struct _kc_vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_hdr _kc_vlan_hdr +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#define vlan_tx_tag_present(_skb) 0 +#define vlan_tx_tag_get(_skb) 0 +#endif +#endif + +#ifndef VLAN_PRIO_SHIFT +#define VLAN_PRIO_SHIFT 13 +#endif + + +#ifndef __GFP_COLD +#define __GFP_COLD 0 +#endif + +/*****************************************************************************/ +/* Installations with ethtool version without eeprom, adapter id, or statistics + * support */ + +#ifndef ETH_GSTRING_LEN +#define ETH_GSTRING_LEN 32 +#endif + +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x1d +#undef ethtool_drvinfo +#define ethtool_drvinfo k_ethtool_drvinfo +struct k_ethtool_drvinfo { + u32 cmd; + char driver[32]; + char version[32]; + char fw_version[32]; + char bus_info[32]; + char reserved1[32]; + char reserved2[16]; + u32 n_stats; + u32 testinfo_len; + u32 eedump_len; + u32 regdump_len; +}; + +struct ethtool_stats { + u32 cmd; + u32 n_stats; + u64 data[0]; +}; +#endif /* ETHTOOL_GSTATS */ + +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x1c +#endif /* ETHTOOL_PHYS_ID */ + +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x1b +enum ethtool_stringset { + ETH_SS_TEST = 0, + ETH_SS_STATS, +}; +struct ethtool_gstrings { + u32 cmd; /* ETHTOOL_GSTRINGS */ + u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ + u32 len; /* number of strings in the string set */ + u8 data[0]; +}; +#endif /* ETHTOOL_GSTRINGS */ + +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x1a +enum ethtool_test_flags { + ETH_TEST_FL_OFFLINE = (1 << 0), + ETH_TEST_FL_FAILED = (1 << 1), +}; +struct ethtool_test { + u32 cmd; + u32 flags; + u32 reserved; + u32 len; + u64 data[0]; +}; +#endif /* ETHTOOL_TEST */ + +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0xb +#undef ETHTOOL_GREGS +struct ethtool_eeprom { + u32 cmd; + u32 magic; + u32 offset; + u32 len; + u8 data[0]; +}; + +struct ethtool_value { + u32 cmd; + u32 data; +}; +#endif /* ETHTOOL_GEEPROM */ + +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0xa +#endif /* ETHTOOL_GLINK */ + +#ifndef ETHTOOL_GWOL +#define ETHTOOL_GWOL 0x5 +#define ETHTOOL_SWOL 0x6 +#define SOPASS_MAX 6 +struct ethtool_wolinfo { + u32 cmd; + u32 supported; + u32 wolopts; + u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */ +}; +#endif /* ETHTOOL_GWOL */ + +#ifndef ETHTOOL_GREGS +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ +#define ethtool_regs _kc_ethtool_regs +/* for passing big chunks of data */ +struct _kc_ethtool_regs { + u32 cmd; + u32 version; /* driver-specific, indicates different chips/revs */ + u32 len; /* bytes */ + u8 data[0]; +}; +#endif /* ETHTOOL_GREGS */ + +#ifndef ETHTOOL_GMSGLVL +#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ +#endif +#ifndef ETHTOOL_SMSGLVL +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ +#endif +#ifndef ETHTOOL_NWAY_RST +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ +#endif +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0x0000000a /* Get link status */ +#endif +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ +#endif +#ifndef ETHTOOL_SEEPROM +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ +#endif +#ifndef ETHTOOL_GCOALESCE +#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ +/* for configuring coalescing parameters of chip */ +#define ethtool_coalesce _kc_ethtool_coalesce +struct _kc_ethtool_coalesce { + u32 cmd; /* ETHTOOL_{G,S}COALESCE */ + + /* How many usecs to delay an RX interrupt after + * a packet arrives. If 0, only rx_max_coalesced_frames + * is used. + */ + u32 rx_coalesce_usecs; + + /* How many packets to delay an RX interrupt after + * a packet arrives. If 0, only rx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause RX interrupts to never be + * generated. + */ + u32 rx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 rx_coalesce_usecs_irq; + u32 rx_max_coalesced_frames_irq; + + /* How many usecs to delay a TX interrupt after + * a packet is sent. If 0, only tx_max_coalesced_frames + * is used. + */ + u32 tx_coalesce_usecs; + + /* How many packets to delay a TX interrupt after + * a packet is sent. If 0, only tx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause TX interrupts to never be + * generated. + */ + u32 tx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 tx_coalesce_usecs_irq; + u32 tx_max_coalesced_frames_irq; + + /* How many usecs to delay in-memory statistics + * block updates. Some drivers do not have an in-memory + * statistic block, and in such cases this value is ignored. + * This value must not be zero. + */ + u32 stats_block_coalesce_usecs; + + /* Adaptive RX/TX coalescing is an algorithm implemented by + * some drivers to improve latency under low packet rates and + * improve throughput under high packet rates. Some drivers + * only implement one of RX or TX adaptive coalescing. Anything + * not implemented by the driver causes these values to be + * silently ignored. + */ + u32 use_adaptive_rx_coalesce; + u32 use_adaptive_tx_coalesce; + + /* When the packet rate (measured in packets per second) + * is below pkt_rate_low, the {rx,tx}_*_low parameters are + * used. + */ + u32 pkt_rate_low; + u32 rx_coalesce_usecs_low; + u32 rx_max_coalesced_frames_low; + u32 tx_coalesce_usecs_low; + u32 tx_max_coalesced_frames_low; + + /* When the packet rate is below pkt_rate_high but above + * pkt_rate_low (both measured in packets per second) the + * normal {rx,tx}_* coalescing parameters are used. + */ + + /* When the packet rate is (measured in packets per second) + * is above pkt_rate_high, the {rx,tx}_*_high parameters are + * used. + */ + u32 pkt_rate_high; + u32 rx_coalesce_usecs_high; + u32 rx_max_coalesced_frames_high; + u32 tx_coalesce_usecs_high; + u32 tx_max_coalesced_frames_high; + + /* How often to do adaptive coalescing packet rate sampling, + * measured in seconds. Must not be zero. + */ + u32 rate_sample_interval; +}; +#endif /* ETHTOOL_GCOALESCE */ + +#ifndef ETHTOOL_SCOALESCE +#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ +#endif +#ifndef ETHTOOL_GRINGPARAM +#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ +/* for configuring RX/TX ring parameters */ +#define ethtool_ringparam _kc_ethtool_ringparam +struct _kc_ethtool_ringparam { + u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ + + /* Read only attributes. These indicate the maximum number + * of pending RX/TX ring entries the driver will allow the + * user to set. + */ + u32 rx_max_pending; + u32 rx_mini_max_pending; + u32 rx_jumbo_max_pending; + u32 tx_max_pending; + + /* Values changeable by the user. The valid values are + * in the range 1 to the "*_max_pending" counterpart above. + */ + u32 rx_pending; + u32 rx_mini_pending; + u32 rx_jumbo_pending; + u32 tx_pending; +}; +#endif /* ETHTOOL_GRINGPARAM */ + +#ifndef ETHTOOL_SRINGPARAM +#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ +#endif +#ifndef ETHTOOL_GPAUSEPARAM +#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ +/* for configuring link flow control parameters */ +#define ethtool_pauseparam _kc_ethtool_pauseparam +struct _kc_ethtool_pauseparam { + u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ + + /* If the link is being auto-negotiated (via ethtool_cmd.autoneg + * being true) the user may set 'autoneg' here non-zero to have the + * pause parameters be auto-negotiated too. In such a case, the + * {rx,tx}_pause values below determine what capabilities are + * advertised. + * + * If 'autoneg' is zero or the link is not being auto-negotiated, + * then {rx,tx}_pause force the driver to use/not-use pause + * flow control. + */ + u32 autoneg; + u32 rx_pause; + u32 tx_pause; +}; +#endif /* ETHTOOL_GPAUSEPARAM */ + +#ifndef ETHTOOL_SPAUSEPARAM +#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ +#endif +#ifndef ETHTOOL_GRXCSUM +#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_SRXCSUM +#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GTXCSUM +#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STXCSUM +#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GSG +#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable + * (ethtool_value) */ +#endif +#ifndef ETHTOOL_SSG +#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable + * (ethtool_value). */ +#endif +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ +#endif +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ +#endif +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ +#endif +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ +#endif +#ifndef ETHTOOL_GTSO +#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STSO +#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#endif + +#ifndef ETHTOOL_BUSINFO_LEN +#define ETHTOOL_BUSINFO_LEN 32 +#endif + +#ifndef RHEL_RELEASE_CODE +/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */ +#define RHEL_RELEASE_CODE 0 +#endif +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif +#ifndef AX_RELEASE_CODE +#define AX_RELEASE_CODE 0 +#endif +#ifndef AX_RELEASE_VERSION +#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif + +/* SuSE version macro is the same as Linux kernel version */ +#ifndef SLE_VERSION +#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) +#endif +#ifndef SLE_VERSION_CODE +#ifdef CONFIG_SUSE_KERNEL +/* SLES11 GA is 2.6.27 based */ +#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) +#define SLE_VERSION_CODE SLE_VERSION(11,0,0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#else +#define SLE_VERSION_CODE 0 +#endif +#else /* CONFIG_SUSE_KERNEL */ +#define SLE_VERSION_CODE 0 +#endif /* CONFIG_SUSE_KERNEL */ +#endif /* SLE_VERSION_CODE */ + +#ifdef __KLOCWORK__ +#ifdef ARRAY_SIZE +#undef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif +#endif /* __KLOCWORK__ */ + +/*****************************************************************************/ +/* 2.4.3 => 2.4.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +#ifndef pci_set_dma_mask +#define pci_set_dma_mask _kc_pci_set_dma_mask +extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); +#endif + +#ifndef pci_request_regions +#define pci_request_regions _kc_pci_request_regions +extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name); +#endif + +#ifndef pci_release_regions +#define pci_release_regions _kc_pci_release_regions +extern void _kc_pci_release_regions(struct pci_dev *pdev); +#endif + +/**************************************/ +/* NETWORK DRIVER API */ + +#ifndef alloc_etherdev +#define alloc_etherdev _kc_alloc_etherdev +extern struct net_device * _kc_alloc_etherdev(int sizeof_priv); +#endif + +#ifndef is_valid_ether_addr +#define is_valid_ether_addr _kc_is_valid_ether_addr +extern int _kc_is_valid_ether_addr(u8 *addr); +#endif + +/**************************************/ +/* MISCELLANEOUS */ + +#ifndef INIT_TQUEUE +#define INIT_TQUEUE(_tq, _routine, _data) \ + do { \ + INIT_LIST_HEAD(&(_tq)->list); \ + (_tq)->sync = 0; \ + (_tq)->routine = _routine; \ + (_tq)->data = _data; \ + } while (0) +#endif + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) ) +/* Generic MII registers. */ +#define MII_BMCR 0x00 /* Basic mode control register */ +#define MII_BMSR 0x01 /* Basic mode status register */ +#define MII_PHYSID1 0x02 /* PHYS ID 1 */ +#define MII_PHYSID2 0x03 /* PHYS ID 2 */ +#define MII_ADVERTISE 0x04 /* Advertisement control reg */ +#define MII_LPA 0x05 /* Link partner ability reg */ +#define MII_EXPANSION 0x06 /* Expansion register */ +/* Basic mode control register. */ +#define BMCR_FULLDPLX 0x0100 /* Full duplex */ +#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */ +/* Basic mode status register. */ +#define BMSR_ERCAP 0x0001 /* Ext-reg capability */ +#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ +#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ +#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ +#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ +#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */ +/* Advertisement control register. */ +#define ADVERTISE_CSMA 0x0001 /* Only selector supported */ +#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ +#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \ + ADVERTISE_100HALF | ADVERTISE_100FULL) +/* Expansion register for auto-negotiation. */ +#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */ +#endif + +/*****************************************************************************/ +/* 2.4.6 => 2.4.3 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +#ifndef pci_set_power_state +#define pci_set_power_state _kc_pci_set_power_state +extern int _kc_pci_set_power_state(struct pci_dev *dev, int state); +#endif + +#ifndef pci_enable_wake +#define pci_enable_wake _kc_pci_enable_wake +extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable); +#endif + +#ifndef pci_disable_device +#define pci_disable_device _kc_pci_disable_device +extern void _kc_pci_disable_device(struct pci_dev *pdev); +#endif + +/* PCI PM entry point syntax changed, so don't support suspend/resume */ +#undef CONFIG_PM + +#endif /* 2.4.6 => 2.4.3 */ + +#ifndef HAVE_PCI_SET_MWI +#define pci_set_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \ + PCI_COMMAND_INVALIDATE); +#define pci_clear_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \ + ~PCI_COMMAND_INVALIDATE); +#endif + +/*****************************************************************************/ +/* 2.4.10 => 2.4.9 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) ) + +/**************************************/ +/* MODULE API */ + +#ifndef MODULE_LICENSE + #define MODULE_LICENSE(X) +#endif + +/**************************************/ +/* OTHER */ + +#undef min +#define min(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#undef max +#define max(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#define min_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x < _y ? _x : _y; }) + +#define max_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x > _y ? _x : _y; }) + +#ifndef list_for_each_safe +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) +#endif + +#ifndef ____cacheline_aligned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_aligned_in_smp ____cacheline_aligned +#else +#define ____cacheline_aligned_in_smp +#endif /* CONFIG_SMP */ +#endif + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...); +#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args) +extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args) +#else /* 2.4.8 => 2.4.9 */ +extern int snprintf(char * buf, size_t size, const char *fmt, ...); +extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#endif +#endif /* 2.4.10 -> 2.4.6 */ + + +/*****************************************************************************/ +/* 2.4.12 => 2.4.10 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) ) +#ifndef HAVE_NETIF_MSG +#define HAVE_NETIF_MSG 1 +enum { + NETIF_MSG_DRV = 0x0001, + NETIF_MSG_PROBE = 0x0002, + NETIF_MSG_LINK = 0x0004, + NETIF_MSG_TIMER = 0x0008, + NETIF_MSG_IFDOWN = 0x0010, + NETIF_MSG_IFUP = 0x0020, + NETIF_MSG_RX_ERR = 0x0040, + NETIF_MSG_TX_ERR = 0x0080, + NETIF_MSG_TX_QUEUED = 0x0100, + NETIF_MSG_INTR = 0x0200, + NETIF_MSG_TX_DONE = 0x0400, + NETIF_MSG_RX_STATUS = 0x0800, + NETIF_MSG_PKTDATA = 0x1000, + NETIF_MSG_HW = 0x2000, + NETIF_MSG_WOL = 0x4000, +}; + +#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) +#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) +#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) +#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) +#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) +#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) +#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) +#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) +#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) +#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) +#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) +#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) +#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) +#endif /* !HAVE_NETIF_MSG */ +#endif /* 2.4.12 => 2.4.10 */ + +/*****************************************************************************/ +/* 2.4.13 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#ifndef virt_to_page + #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT)) +#endif + +#ifndef pci_map_page +#define pci_map_page _kc_pci_map_page +extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction); +#endif + +#ifndef pci_unmap_page +#define pci_unmap_page _kc_pci_unmap_page +extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction); +#endif + +/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */ + +#undef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0xffffffff +#undef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffff + +/**************************************/ +/* OTHER */ + +#ifndef cpu_relax +#define cpu_relax() rep_nop() +#endif + +struct vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_vlan_proto; + unsigned short h_vlan_TCI; + unsigned short h_vlan_encapsulated_proto; +}; +#endif /* 2.4.13 => 2.4.12 */ + +/*****************************************************************************/ +/* 2.4.17 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) ) + +#ifndef __devexit_p + #define __devexit_p(x) &(x) +#endif + +#endif /* 2.4.17 => 2.4.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ) +#define NETIF_MSG_HW 0x2000 +#define NETIF_MSG_WOL 0x4000 + +#ifndef netif_msg_hw +#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) +#endif +#ifndef netif_msg_wol +#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) +#endif +#endif /* 2.4.18 */ + +/*****************************************************************************/ + +/*****************************************************************************/ +/* 2.4.20 => 2.4.19 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) ) + +/* we won't support NAPI on less than 2.4.20 */ +#ifdef NAPI +#undef NAPI +#undef CONFIG_IXGBE_NAPI +#endif + +#endif /* 2.4.20 => 2.4.19 */ + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#define pci_name(x) ((x)->slot_name) +#endif + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#ifndef IXGBE_NO_LRO +/* Don't enable LRO for these legacy kernels */ +#define IXGBE_NO_LRO +#endif +#endif + +/*****************************************************************************/ +/*****************************************************************************/ +/* 2.4.23 => 2.4.22 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) ) +/*****************************************************************************/ +#ifdef NAPI +#ifndef netif_poll_disable +#define netif_poll_disable(x) _kc_netif_poll_disable(x) +static inline void _kc_netif_poll_disable(struct net_device *netdev) +{ + while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) { + /* No hurry */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } +} +#endif +#ifndef netif_poll_enable +#define netif_poll_enable(x) _kc_netif_poll_enable(x) +static inline void _kc_netif_poll_enable(struct net_device *netdev) +{ + clear_bit(__LINK_STATE_RX_SCHED, &netdev->state); +} +#endif +#endif /* NAPI */ +#ifndef netif_tx_disable +#define netif_tx_disable(x) _kc_netif_tx_disable(x) +static inline void _kc_netif_tx_disable(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + netif_stop_queue(dev); + spin_unlock_bh(&dev->xmit_lock); +} +#endif +#else /* 2.4.23 => 2.4.22 */ +#define HAVE_SCTP +#endif /* 2.4.23 => 2.4.22 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) ) +#define ETHTOOL_OPS_COMPAT +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +/* 2.5.71 => 2.4.x */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) ) +#define sk_protocol protocol +#define pci_get_device pci_find_device +#endif /* 2.5.70 => 2.4.x */ + +/*****************************************************************************/ +/* < 2.4.27 or 2.6.0 <= 2.6.5 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) ) + +#ifndef netif_msg_init +#define netif_msg_init _kc_netif_msg_init +static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits) +{ + /* use default */ + if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) + return default_msg_enable_bits; + if (debug_value == 0) /* no output */ + return 0; + /* set low N bits */ + return (1 << debug_value) -1; +} +#endif + +#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */ +/*****************************************************************************/ +#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \ + (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \ + ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ))) +#define netdev_priv(x) x->priv +#endif + +/*****************************************************************************/ +/* <= 2.5.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) ) +#include +#undef pci_register_driver +#define pci_register_driver pci_module_init + +/* + * Most of the dma compat code is copied/modifed from the 2.4.37 + * /include/linux/libata-compat.h header file + */ +/* These definitions mirror those in pci.h, so they can be used + * interchangeably with their PCI_ counterparts */ +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +struct device { + struct pci_dev pdev; +}; + +static inline struct pci_dev *to_pci_dev (struct device *dev) +{ + return (struct pci_dev *) dev; +} +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return (struct device *) pdev; +} + +#define pdev_printk(lvl, pdev, fmt, args...) \ + printk("%s %s: " fmt, lvl, pci_name(pdev), ## args) +#define dev_err(dev, fmt, args...) \ + pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args) +#define dev_info(dev, fmt, args...) \ + pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args) +#define dev_warn(dev, fmt, args...) \ + pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args) + +/* NOTE: dangerous! we ignore the 'gfp' argument */ +#define dma_alloc_coherent(dev,sz,dma,gfp) \ + pci_alloc_consistent(to_pci_dev(dev),(sz),(dma)) +#define dma_free_coherent(dev,sz,addr,dma_addr) \ + pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr)) + +#define dma_map_page(dev,a,b,c,d) \ + pci_map_page(to_pci_dev(dev),(a),(b),(c),(d)) +#define dma_unmap_page(dev,a,b,c) \ + pci_unmap_page(to_pci_dev(dev),(a),(b),(c)) + +#define dma_map_single(dev,a,b,c) \ + pci_map_single(to_pci_dev(dev),(a),(b),(c)) +#define dma_unmap_single(dev,a,b,c) \ + pci_unmap_single(to_pci_dev(dev),(a),(b),(c)) + +#define dma_sync_single(dev,a,b,c) \ + pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c)) + +/* for range just sync everything, that's all the pci API can do */ +#define dma_sync_single_range(dev,addr,off,sz,dir) \ + pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir)) + +#define dma_set_mask(dev,mask) \ + pci_set_dma_mask(to_pci_dev(dev),(mask)) + +/* hlist_* code - double linked lists */ +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = NULL; + n->pprev = NULL; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +static inline void INIT_HLIST_NODE(struct hlist_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +#ifndef might_sleep +#define might_sleep() +#endif +#else +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return &pdev->dev; +} +#endif /* <= 2.5.0 */ + +/*****************************************************************************/ +/* 2.5.28 => 2.4.23 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) + +static inline void _kc_synchronize_irq(void) +{ + synchronize_irq(); +} +#undef synchronize_irq +#define synchronize_irq(X) _kc_synchronize_irq() + +#include +#define work_struct tq_struct +#undef INIT_WORK +#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a) +#undef container_of +#define container_of list_entry +#define schedule_work schedule_task +#define flush_scheduled_work flush_scheduled_tasks +#define cancel_work_sync(x) flush_scheduled_work() + +#endif /* 2.5.28 => 2.4.17 */ + +/*****************************************************************************/ +/* 2.6.0 => 2.5.28 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#undef get_cpu +#define get_cpu() smp_processor_id() +#undef put_cpu +#define put_cpu() do { } while(0) +#define MODULE_INFO(version, _version) +#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT +#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1 +#endif +#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1 + +#define dma_set_coherent_mask(dev,mask) 1 + +#undef dev_put +#define dev_put(dev) __dev_put(dev) + +#ifndef skb_fill_page_desc +#define skb_fill_page_desc _kc_skb_fill_page_desc +extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size); +#endif + +#undef ALIGN +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) + +#ifndef page_count +#define page_count(p) atomic_read(&(p)->count) +#endif + +#ifdef MAX_NUMNODES +#undef MAX_NUMNODES +#endif +#define MAX_NUMNODES 1 + +/* find_first_bit and find_next bit are not defined for most + * 2.4 kernels (except for the redhat 2.4.21 kernels + */ +#include +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#undef find_next_bit +#define find_next_bit _kc_find_next_bit +extern unsigned long _kc_find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) + + +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (strchr(dev->name, '%')) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#ifndef strlcpy +#define strlcpy _kc_strlcpy +extern size_t _kc_strlcpy(char *dest, const char *src, size_t size); +#endif /* strlcpy */ + +#endif /* 2.6.0 => 2.5.28 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +#define MODULE_VERSION(_version) MODULE_INFO(version, _version) +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +/* 2.6.5 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) +#define dma_sync_single_for_cpu dma_sync_single +#define dma_sync_single_for_device dma_sync_single +#define dma_sync_single_range_for_cpu dma_sync_single_range +#define dma_sync_single_range_for_device dma_sync_single_range +#ifndef pci_dma_mapping_error +#define pci_dma_mapping_error _kc_pci_dma_mapping_error +static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr) +{ + return dma_addr == 0; +} +#endif +#endif /* 2.6.5 => 2.6.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...); +#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args) +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) ) +/* taken from 2.6 include/linux/bitmap.h */ +#undef bitmap_zero +#define bitmap_zero _kc_bitmap_zero +static inline void _kc_bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} +#define random_ether_addr _kc_random_ether_addr +static inline void _kc_random_ether_addr(u8 *addr) +{ + get_random_bytes(addr, ETH_ALEN); + addr[0] &= 0xfe; /* clear multicast */ + addr[0] |= 0x02; /* set local assignment */ +} +#define page_to_nid(x) 0 + +#endif /* < 2.6.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) ) +#undef if_mii +#define if_mii _kc_if_mii +static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq) +{ + return (struct mii_ioctl_data *) &rq->ifr_ifru; +} + +#ifndef __force +#define __force +#endif +#endif /* < 2.6.7 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +#ifndef PCI_EXP_DEVCTL +#define PCI_EXP_DEVCTL 8 +#endif +#ifndef PCI_EXP_DEVCTL_CERE +#define PCI_EXP_DEVCTL_CERE 0x0001 +#endif +#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \ + schedule_timeout((x * HZ)/1000 + 2); \ + } while (0) + +#endif /* < 2.6.8 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) +#include +#define __iomem + +#ifndef kcalloc +#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags) +extern void *_kc_kzalloc(size_t size, int flags); +#endif +#define MSEC_PER_SEC 1000L +static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j) +{ +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); +#else + return (j * MSEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return m * (HZ / MSEC_PER_SEC); +#else + return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; +#endif +} + +#define msleep_interruptible _kc_msleep_interruptible +static inline unsigned long _kc_msleep_interruptible(unsigned int msecs) +{ + unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1; + + while (timeout && !signal_pending(current)) { + __set_current_state(TASK_INTERRUPTIBLE); + timeout = schedule_timeout(timeout); + } + return _kc_jiffies_to_msecs(timeout); +} + +/* Basic mode control register. */ +#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ + +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 +#endif +#ifndef __be16 +#define __be16 u16 +#endif +#ifndef __be32 +#define __be32 u32 +#endif +#ifndef __be64 +#define __be64 u64 +#endif + +static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) +{ + return (struct vlan_ethhdr *)skb->mac.raw; +} + +/* Wake-On-Lan options. */ +#define WAKE_PHY (1 << 0) +#define WAKE_UCAST (1 << 1) +#define WAKE_MCAST (1 << 2) +#define WAKE_BCAST (1 << 3) +#define WAKE_ARP (1 << 4) +#define WAKE_MAGIC (1 << 5) +#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ + +#define skb_header_pointer _kc_skb_header_pointer +static inline void *_kc_skb_header_pointer(const struct sk_buff *skb, + int offset, int len, void *buffer) +{ + int hlen = skb_headlen(skb); + + if (hlen - offset >= len) + return skb->data + offset; + +#ifdef MAX_SKB_FRAGS + if (skb_copy_bits(skb, offset, buffer, len) < 0) + return NULL; + + return buffer; +#else + return NULL; +#endif + +#ifndef NETDEV_TX_OK +#define NETDEV_TX_OK 0 +#endif +#ifndef NETDEV_TX_BUSY +#define NETDEV_TX_BUSY 1 +#endif +#ifndef NETDEV_TX_LOCKED +#define NETDEV_TX_LOCKED -1 +#endif +} + +#ifndef __bitwise +#define __bitwise +#endif +#endif /* < 2.6.9 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +#ifdef module_param_array_named +#undef module_param_array_named +#define module_param_array_named(name, array, type, nump, perm) \ + static struct kparam_array __param_arr_##name \ + = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \ + sizeof(array[0]), array }; \ + module_param_call(name, param_array_set, param_array_get, \ + &__param_arr_##name, perm) +#endif /* module_param_array_named */ +/* + * num_online is broken for all < 2.6.10 kernels. This is needed to support + * Node module parameter of ixgbe. + */ +#undef num_online_nodes +#define num_online_nodes(n) 1 +extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES); +#undef node_online_map +#define node_online_map _kcompat_node_online_map +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) ) +#define PCI_D0 0 +#define PCI_D1 1 +#define PCI_D2 2 +#define PCI_D3hot 3 +#define PCI_D3cold 4 +typedef int pci_power_t; +#define pci_choose_state(pdev,state) state +#define PMSG_SUSPEND 3 +#define PCI_EXP_LNKCTL 16 + +#undef NETIF_F_LLTX + +#ifndef ARCH_HAS_PREFETCH +#define prefetch(X) +#endif + +#ifndef NET_IP_ALIGN +#define NET_IP_ALIGN 2 +#endif + +#define KC_USEC_PER_SEC 1000000L +#define usecs_to_jiffies _kc_usecs_to_jiffies +static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j) +{ +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (KC_USEC_PER_SEC / HZ) * j; +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC); +#else + return (j * KC_USEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ); +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return m * (HZ / KC_USEC_PER_SEC); +#else + return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC; +#endif +} +#endif /* < 2.6.11 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) ) +#include +#define USE_REBOOT_NOTIFIER + +/* Generic MII registers. */ +#define MII_CTRL1000 0x09 /* 1000BASE-T control */ +#define MII_STAT1000 0x0a /* 1000BASE-T status */ +/* Advertisement control register. */ +#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ +#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */ +/* 1000BASE-T Control register */ +#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ +#ifndef is_zero_ether_addr +#define is_zero_ether_addr _kc_is_zero_ether_addr +static inline int _kc_is_zero_ether_addr(const u8 *addr) +{ + return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); +} +#endif /* is_zero_ether_addr */ +#ifndef is_multicast_ether_addr +#define is_multicast_ether_addr _kc_is_multicast_ether_addr +static inline int _kc_is_multicast_ether_addr(const u8 *addr) +{ + return addr[0] & 0x01; +} +#endif /* is_multicast_ether_addr */ +#endif /* < 2.6.12 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +#ifndef kstrdup +#define kstrdup _kc_kstrdup +extern char *_kc_kstrdup(const char *s, unsigned int gfp); +#endif +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +#define pm_message_t u32 +#ifndef kzalloc +#define kzalloc _kc_kzalloc +extern void *_kc_kzalloc(size_t size, int flags); +#endif + +/* Generic MII registers. */ +#define MII_ESTATUS 0x0f /* Extended Status */ +/* Basic mode status register. */ +#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ +/* Extended status register. */ +#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ +#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ + +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#if (!(RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)))) +#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t)) +#define gfp_t unsigned +#else +typedef unsigned gfp_t; +#endif +#endif /* !RHEL4.3->RHEL5.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) ) +#ifdef CONFIG_X86_64 +#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir) \ + dma_sync_single_for_cpu(dev, dma_handle, size, dir) +#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \ + dma_sync_single_for_device(dev, dma_handle, size, dir) +#endif +#endif +#endif /* < 2.6.14 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) ) +#ifndef vmalloc_node +#define vmalloc_node(a,b) vmalloc(a) +#endif /* vmalloc_node*/ + +#define setup_timer(_timer, _function, _data) \ +do { \ + (_timer)->function = _function; \ + (_timer)->data = _data; \ + init_timer(_timer); \ +} while (0) +#ifndef device_can_wakeup +#define device_can_wakeup(dev) (1) +#endif +#ifndef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) do{}while(0) +#endif +#ifndef device_init_wakeup +#define device_init_wakeup(dev,val) do {} while (0) +#endif +static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2) +{ + const u16 *a = (const u16 *) addr1; + const u16 *b = (const u16 *) addr2; + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +} +#undef compare_ether_addr +#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2) +#endif /* < 2.6.15 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ) +#undef DEFINE_MUTEX +#define DEFINE_MUTEX(x) DECLARE_MUTEX(x) +#define mutex_lock(x) down_interruptible(x) +#define mutex_unlock(x) up(x) + +#ifndef ____cacheline_internodealigned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp +#else +#define ____cacheline_internodealigned_in_smp +#endif /* CONFIG_SMP */ +#endif /* ____cacheline_internodealigned_in_smp */ +#undef HAVE_PCI_ERS +#else /* 2.6.16 and above */ +#undef HAVE_PCI_ERS +#define HAVE_PCI_ERS +#endif /* < 2.6.16 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) ) +#ifndef first_online_node +#define first_online_node 0 +#endif +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif +#endif /* < 2.6.17 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ) + +#ifndef IRQ_HANDLED +#define irqreturn_t void +#define IRQ_HANDLED +#define IRQ_NONE +#endif + +#ifndef IRQF_PROBE_SHARED +#ifdef SA_PROBEIRQ +#define IRQF_PROBE_SHARED SA_PROBEIRQ +#else +#define IRQF_PROBE_SHARED 0 +#endif +#endif + +#ifndef IRQF_SHARED +#define IRQF_SHARED SA_SHIRQ +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#ifndef FIELD_SIZEOF +#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#endif + +#ifndef skb_is_gso +#ifdef NETIF_F_TSO +#define skb_is_gso _kc_skb_is_gso +static inline int _kc_skb_is_gso(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_size; +} +#else +#define skb_is_gso(a) 0 +#endif +#endif + +#ifndef resource_size_t +#define resource_size_t unsigned long +#endif + +#ifdef skb_pad +#undef skb_pad +#endif +#define skb_pad(x,y) _kc_skb_pad(x, y) +int _kc_skb_pad(struct sk_buff *skb, int pad); +#ifdef skb_padto +#undef skb_padto +#endif +#define skb_padto(x,y) _kc_skb_padto(x, y) +static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + if(likely(size >= len)) + return 0; + return _kc_skb_pad(skb, len - size); +} + +#ifndef DECLARE_PCI_UNMAP_ADDR +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + u32 LEN_NAME +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) +#endif /* DECLARE_PCI_UNMAP_ADDR */ +#endif /* < 2.6.18 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#endif +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) ) +#if (!((RHEL_RELEASE_CODE && \ + ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \ + RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))) || \ + (AX_RELEASE_CODE && AX_RELEASE_CODE > AX_RELEASE_VERSION(3,0)))) +typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *); +#endif +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +#undef CONFIG_INET_LRO +#undef CONFIG_INET_LRO_MODULE +#undef CONFIG_FCOE +#undef CONFIG_FCOE_MODULE +#endif +typedef irqreturn_t (*new_handler_t)(int, void*); +static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#else /* 2.4.x */ +typedef void (*irq_handler_t)(int, void*, struct pt_regs *); +typedef void (*new_handler_t)(int, void*); +static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#endif /* >= 2.5.x */ +{ + irq_handler_t new_handler = (irq_handler_t) handler; + return request_irq(irq, new_handler, flags, devname, dev_id); +} + +#undef request_irq +#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id)) + +#define irq_handler_t new_handler_t +/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */ +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +#define PCIE_CONFIG_SPACE_LEN 256 +#define PCI_CONFIG_SPACE_LEN 64 +#define PCIE_LINK_STATUS 0x12 +#define pci_config_space_ich8lan() do {} while(0) +#undef pci_save_state +extern int _kc_pci_save_state(struct pci_dev *); +#define pci_save_state(pdev) _kc_pci_save_state(pdev) +#undef pci_restore_state +extern void _kc_pci_restore_state(struct pci_dev *); +#define pci_restore_state(pdev) _kc_pci_restore_state(pdev) +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +#undef free_netdev +extern void _kc_free_netdev(struct net_device *); +#define free_netdev(netdev) _kc_free_netdev(netdev) +#endif +static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + return 0; +} +#define pci_disable_pcie_error_reporting(dev) do {} while (0) +#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0) + +extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp); +#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp) +#ifndef bool +#define bool _Bool +#define true 1 +#define false 0 +#endif +#else /* 2.6.19 */ +#include +#include +#endif /* < 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) ) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) ) +#undef INIT_WORK +#define INIT_WORK(_work, _func) \ +do { \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->pending = 0; \ + (_work)->func = (void (*)(void *))_func; \ + (_work)->data = _work; \ + init_timer(&(_work)->timer); \ +} while (0) +#endif + +#ifndef PCI_VDEVICE +#define PCI_VDEVICE(ven, dev) \ + PCI_VENDOR_ID_##ven, (dev), \ + PCI_ANY_ID, PCI_ANY_ID, 0, 0 +#endif + +#ifndef round_jiffies +#define round_jiffies(x) x +#endif + +#define csum_offset csum + +#define HAVE_EARLY_VMALLOC_NODE +#define dev_to_node(dev) -1 +#undef set_dev_node +/* remove compiler warning with b=b, for unused variable */ +#define set_dev_node(a, b) do { (b) = (b); } while(0) + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; +#endif + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} +#endif + +#else /* < 2.6.20 */ +#define HAVE_DEVICE_NUMA_NODE +#endif /* < 2.6.20 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define to_net_dev(class) container_of(class, struct net_device, class_dev) +#define NETDEV_CLASS_DEV +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) +#define vlan_group_get_device(vg, id) (vg->vlan_devices[id]) +#define vlan_group_set_device(vg, id, dev) \ + do { \ + if (vg) vg->vlan_devices[id] = dev; \ + } while (0) +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ +#define pci_channel_offline(pdev) (pdev->error_state && \ + pdev->error_state != pci_channel_io_normal) +#define pci_request_selected_regions(pdev, bars, name) \ + pci_request_regions(pdev, name) +#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev); +#endif /* < 2.6.21 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define tcp_hdr(skb) (skb->h.th) +#define tcp_hdrlen(skb) (skb->h.th->doff << 2) +#define skb_transport_offset(skb) (skb->h.raw - skb->data) +#define skb_transport_header(skb) (skb->h.raw) +#define ipv6_hdr(skb) (skb->nh.ipv6h) +#define ip_hdr(skb) (skb->nh.iph) +#define skb_network_offset(skb) (skb->nh.raw - skb->data) +#define skb_network_header(skb) (skb->nh.raw) +#define skb_tail_pointer(skb) skb->tail +#define skb_reset_tail_pointer(skb) \ + do { \ + skb->tail = skb->data; \ + } while (0) +#define skb_copy_to_linear_data(skb, from, len) \ + memcpy(skb->data, from, len) +#define skb_copy_to_linear_data_offset(skb, offset, from, len) \ + memcpy(skb->data + offset, from, len) +#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw) +#define pci_register_driver pci_module_init +#define skb_mac_header(skb) skb->mac.raw + +#ifdef NETIF_F_MULTI_QUEUE +#ifndef alloc_etherdev_mq +#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a) +#endif +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef ETH_FCS_LEN +#define ETH_FCS_LEN 4 +#endif +#define cancel_work_sync(x) flush_scheduled_work() +#ifndef udp_hdr +#define udp_hdr _udp_hdr +static inline struct udphdr *_udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif + +#ifdef cpu_to_be16 +#undef cpu_to_be16 +#endif +#define cpu_to_be16(x) __constant_htons(x) + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1))) +enum { + DUMP_PREFIX_NONE, + DUMP_PREFIX_ADDRESS, + DUMP_PREFIX_OFFSET +}; +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */ +#ifndef hex_asc +#define hex_asc(x) "0123456789abcdef"[x] +#endif +#include +extern void _kc_print_hex_dump(const char *level, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); +#define print_hex_dump(lvl, s, t, r, g, b, l, a) \ + _kc_print_hex_dump(lvl, s, t, r, g, b, l, a) +#else /* 2.6.22 */ +#define ETH_TYPE_TRANS_SETS_DEV +#define HAVE_NETDEV_STATS_IN_NETDEV +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) ) +#endif /* > 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) +#define netif_subqueue_stopped(_a, _b) 0 +#ifndef PTR_ALIGN +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#endif + +#ifndef CONFIG_PM_SLEEP +#define CONFIG_PM_SLEEP CONFIG_PM +#endif + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) ) +#define HAVE_ETHTOOL_GET_PERM_ADDR +#endif /* 2.6.14 through 2.6.22 */ +#endif /* < 2.6.23 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifndef ETH_FLAG_LRO +#define ETH_FLAG_LRO NETIF_F_LRO +#endif + +/* if GRO is supported then the napi struct must already exist */ +#ifndef NETIF_F_GRO +/* NAPI API changes in 2.6.24 break everything */ +struct napi_struct { + /* used to look up the real NAPI polling routine */ + int (*poll)(struct napi_struct *, int); + struct net_device *dev; + int weight; +}; +#endif + +#ifdef NAPI +extern int __kc_adapter_clean(struct net_device *, int *); +extern struct net_device *napi_to_poll_dev(struct napi_struct *napi); +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = (_napi); \ + struct net_device *poll_dev = napi_to_poll_dev(__napi); \ + poll_dev->poll = &(__kc_adapter_clean); \ + poll_dev->priv = (_napi); \ + poll_dev->weight = (_weight); \ + set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \ + set_bit(__LINK_STATE_START, &poll_dev->state);\ + dev_hold(poll_dev); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_napi) \ + do { \ + struct net_device *poll_dev = napi_to_poll_dev(_napi); \ + WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \ + dev_put(poll_dev); \ + memset(poll_dev, 0, sizeof(struct net_device));\ + } while (0) +#define napi_schedule_prep(_napi) \ + (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi))) +#define napi_schedule(_napi) \ + do { \ + if (napi_schedule_prep(_napi)) \ + __netif_rx_schedule(napi_to_poll_dev(_napi)); \ + } while (0) +#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi)) +#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi)) +#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi)) +#ifndef NETIF_F_GRO +#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi)) +#else +#define napi_complete(_napi) \ + do { \ + napi_gro_flush(_napi); \ + netif_rx_complete(napi_to_poll_dev(_napi)); \ + } while (0) +#endif /* NETIF_F_GRO */ +#else /* NAPI */ +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = _napi; \ + _netdev->poll = &(_poll); \ + _netdev->weight = (_weight); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_a) do {} while (0) +#endif /* NAPI */ + +#undef dev_get_by_name +#define dev_get_by_name(_a, _b) dev_get_by_name(_b) +#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b) +#ifndef DMA_BIT_MASK +#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1)) +#endif + +#ifdef NETIF_F_TSO6 +#define skb_is_gso_v6 _kc_skb_is_gso_v6 +static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; +} +#endif /* NETIF_F_TSO6 */ + +#ifndef KERN_CONT +#define KERN_CONT "" +#endif +#else /* < 2.6.24 */ +#define HAVE_ETHTOOL_GET_SSET_COUNT +#define HAVE_NETDEV_NAPI_LIST +#endif /* < 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#include +#else /* >= 3.2.0 */ +#include +#endif /* else >= 3.2.0 */ +#endif /* > 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) ) +#define PM_QOS_CPU_DMA_LATENCY 1 + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) ) +#include +#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY +#define pm_qos_add_requirement(pm_qos_class, name, value) \ + set_acceptable_latency(name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) \ + remove_acceptable_latency(name) +#define pm_qos_update_requirement(pm_qos_class, name, value) \ + modify_acceptable_latency(name, value) +#else +#define PM_QOS_DEFAULT_VALUE -1 +#define pm_qos_add_requirement(pm_qos_class, name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) +#define pm_qos_update_requirement(pm_qos_class, name, value) { \ + if (value != PM_QOS_DEFAULT_VALUE) { \ + printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \ + pci_name(adapter->pdev)); \ + } \ +} + +#endif /* > 2.6.18 */ + +#define pci_enable_device_mem(pdev) pci_enable_device(pdev) + +#ifndef DEFINE_PCI_DEVICE_TABLE +#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[] +#endif /* DEFINE_PCI_DEVICE_TABLE */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) +#ifndef IXGBE_PROCFS +#define IXGBE_PROCFS +#endif /* IXGBE_PROCFS */ +#endif /* >= 2.6.0 */ + + +#else /* < 2.6.25 */ + +#ifndef IXGBE_SYSFS +#define IXGBE_SYSFS +#endif /* IXGBE_SYSFS */ + + +#endif /* < 2.6.25 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +#ifndef clamp_t +#define clamp_t(type, val, min, max) ({ \ + type __val = (val); \ + type __min = (min); \ + type __max = (max); \ + __val = __val < __min ? __min : __val; \ + __val > __max ? __max : __val; }) +#endif /* clamp_t */ +#ifdef NETIF_F_TSO +#ifdef NETIF_F_TSO6 +#define netif_set_gso_max_size(_netdev, size) \ + do { \ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { \ + _netdev->features &= ~NETIF_F_TSO; \ + _netdev->features &= ~NETIF_F_TSO6; \ + } else { \ + _netdev->features |= NETIF_F_TSO; \ + _netdev->features |= NETIF_F_TSO6; \ + } \ + } while (0) +#else /* NETIF_F_TSO6 */ +#define netif_set_gso_max_size(_netdev, size) \ + do { \ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ + _netdev->features &= ~NETIF_F_TSO; \ + else \ + _netdev->features |= NETIF_F_TSO; \ + } while (0) +#endif /* NETIF_F_TSO6 */ +#else +#define netif_set_gso_max_size(_netdev, size) do {} while (0) +#endif /* NETIF_F_TSO */ +#undef kzalloc_node +#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags) + +extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state); +#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s) +#else /* < 2.6.26 */ +#include +#define HAVE_NETDEV_VLAN_FEATURES +#endif /* < 2.6.26 */ +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep, + __u32 speed) +{ + ep->speed = (__u16)speed; + /* ep->speed_hi = (__u16)(speed >> 16); */ +} +#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set + +static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep) +{ + /* no speed_hi before 2.6.27, and probably no need for it yet */ + return (__u32)ep->speed; +} +#define ethtool_cmd_speed _kc_ethtool_cmd_speed + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) ) +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM)) +#define ANCIENT_PM 1 +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \ + defined(CONFIG_PM_SLEEP)) +#define NEWER_PM 1 +#endif +#if defined(ANCIENT_PM) || defined(NEWER_PM) +#undef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) \ + do { \ + u16 pmc = 0; \ + int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \ + if (pm) { \ + pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \ + &pmc); \ + } \ + (dev)->power.can_wakeup = !!(pmc >> 11); \ + (dev)->power.should_wakeup = (val && (pmc >> 11)); \ + } while (0) +#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */ +#endif /* 2.6.15 through 2.6.27 */ +#ifndef netif_napi_del +#define netif_napi_del(_a) do {} while (0) +#ifdef NAPI +#ifdef CONFIG_NETPOLL +#undef netif_napi_del +#define netif_napi_del(_a) list_del(&(_a)->dev_list); +#endif +#endif +#endif /* netif_napi_del */ +#ifdef dma_mapping_error +#undef dma_mapping_error +#endif +#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr) + +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#define HAVE_TX_MQ +#endif + +#ifdef HAVE_TX_MQ +extern void _kc_netif_tx_stop_all_queues(struct net_device *); +extern void _kc_netif_tx_wake_all_queues(struct net_device *); +extern void _kc_netif_tx_start_all_queues(struct net_device *); +#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a) +#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a) +#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a) +#undef netif_stop_subqueue +#define netif_stop_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_stop_subqueue((_ndev), (_qi)); \ + else \ + netif_stop_queue((_ndev)); \ + } while (0) +#undef netif_start_subqueue +#define netif_start_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_start_subqueue((_ndev), (_qi)); \ + else \ + netif_start_queue((_ndev)); \ + } while (0) +#else /* HAVE_TX_MQ */ +#define netif_tx_stop_all_queues(a) netif_stop_queue(a) +#define netif_tx_wake_all_queues(a) netif_wake_queue(a) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) ) +#define netif_tx_start_all_queues(a) netif_start_queue(a) +#else +#define netif_tx_start_all_queues(a) do {} while (0) +#endif +#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev)) +#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev)) +#endif /* HAVE_TX_MQ */ +#ifndef NETIF_F_MULTI_QUEUE +#define NETIF_F_MULTI_QUEUE 0 +#define netif_is_multiqueue(a) 0 +#define netif_wake_subqueue(a, b) +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef __WARN_printf +extern void __kc_warn_slowpath(const char *file, const int line, + const char *fmt, ...) __attribute__((format(printf, 3, 4))); +#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg) +#endif /* __WARN_printf */ + +#ifndef WARN +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf(format); \ + unlikely(__ret_warn_on); \ +}) +#endif /* WARN */ +#else /* < 2.6.27 */ +#define HAVE_TX_MQ +#define HAVE_NETDEV_SELECT_QUEUE +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) +#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \ + pci_resource_len(pdev, bar)) +#define pci_wake_from_d3 _kc_pci_wake_from_d3 +#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep +extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable); +extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev); +#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC) +#ifndef __skb_queue_head_init +static inline void __kc_skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = list->next = (struct sk_buff *)list; + list->qlen = 0; +} +#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q) +#endif +#endif /* < 2.6.28 */ + +#ifndef skb_add_rx_frag +#define skb_add_rx_frag _kc_skb_add_rx_frag +extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, int, int); +#endif + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) +#ifndef swap +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +#endif +#define pci_request_selected_regions_exclusive(pdev, bars, name) \ + pci_request_selected_regions(pdev, bars, name) +#ifndef CONFIG_NR_CPUS +#define CONFIG_NR_CPUS 1 +#endif /* CONFIG_NR_CPUS */ +#ifndef pcie_aspm_enabled +#define pcie_aspm_enabled() (1) +#endif /* pcie_aspm_enabled */ +#else /* < 2.6.29 */ +#ifndef HAVE_NET_DEVICE_OPS +#define HAVE_NET_DEVICE_OPS +#endif +#ifdef CONFIG_DCB +#define HAVE_PFC_MODE_ENABLE +#endif /* CONFIG_DCB */ +#endif /* < 2.6.29 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) +#define skb_rx_queue_recorded(a) false +#define skb_get_rx_queue(a) 0 +#undef CONFIG_FCOE +#undef CONFIG_FCOE_MODULE +extern u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb); +#define skb_tx_hash(n, s) _kc_skb_tx_hash(n, s) +#define skb_record_rx_queue(a, b) do {} while (0) +#ifndef CONFIG_PCI_IOV +#undef pci_enable_sriov +#define pci_enable_sriov(a, b) -ENOTSUPP +#undef pci_disable_sriov +#define pci_disable_sriov(a) do {} while (0) +#endif /* CONFIG_PCI_IOV */ +#ifndef pr_cont +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) +#endif /* pr_cont */ +#else +#define HAVE_ASPM_QUIRKS +#endif /* < 2.6.30 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) ) +#define ETH_P_1588 0x88F7 +#define ETH_P_FIP 0x8914 +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc_count) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(uclist, dev) \ + for (uclist = dev->uc_list; uclist; uclist = uclist->next) +#endif +#else +#ifndef HAVE_NETDEV_STORAGE_ADDRESS +#define HAVE_NETDEV_STORAGE_ADDRESS +#endif +#ifndef HAVE_NETDEV_HW_ADDR +#define HAVE_NETDEV_HW_ADDR +#endif +#ifndef HAVE_TRANS_START_IN_QUEUE +#define HAVE_TRANS_START_IN_QUEUE +#endif +#endif /* < 2.6.31 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) ) +#undef netdev_tx_t +#define netdev_tx_t int +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef NETIF_F_FCOE_MTU +#define NETIF_F_FCOE_MTU (1 << 26) +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ + +#ifndef pm_runtime_get_sync +#define pm_runtime_get_sync(dev) do {} while (0) +#endif +#ifndef pm_runtime_put +#define pm_runtime_put(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_sync +#define pm_runtime_put_sync(dev) do {} while (0) +#endif +#ifndef pm_runtime_resume +#define pm_runtime_resume(dev) do {} while (0) +#endif +#ifndef pm_schedule_suspend +#define pm_schedule_suspend(dev, t) do {} while (0) +#endif +#ifndef pm_runtime_set_suspended +#define pm_runtime_set_suspended(dev) do {} while (0) +#endif +#ifndef pm_runtime_disable +#define pm_runtime_disable(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_noidle +#define pm_runtime_put_noidle(dev) do {} while (0) +#endif +#ifndef pm_runtime_set_active +#define pm_runtime_set_active(dev) do {} while (0) +#endif +#ifndef pm_runtime_enable +#define pm_runtime_enable(dev) do {} while (0) +#endif +#ifndef pm_runtime_get_noresume +#define pm_runtime_get_noresume(dev) do {} while (0) +#endif +#else /* < 2.6.32 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE +#define HAVE_NETDEV_OPS_FCOE_ENABLE +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_OPS_GETAPP +#define HAVE_DCBNL_OPS_GETAPP +#endif +#endif /* CONFIG_DCB */ +#include +/* IOV bad DMA target work arounds require at least this kernel rev support */ +#define HAVE_PCIE_TYPE +#endif /* < 2.6.32 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) ) +#ifndef pci_pcie_cap +#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP) +#endif +#ifndef IPV4_FLOW +#define IPV4_FLOW 0x10 +#endif /* IPV4_FLOW */ +#ifndef IPV6_FLOW +#define IPV6_FLOW 0x11 +#endif /* IPV6_FLOW */ +/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */ +#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) ) +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#endif /* RHEL6 or SLES11 SP1 */ +#ifndef __percpu +#define __percpu +#endif /* __percpu */ +#else /* < 2.6.33 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#define HAVE_ETHTOOL_SFP_DISPLAY_PORT +#endif /* < 2.6.33 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) +#ifndef ETH_FLAG_NTUPLE +#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE +#endif + +#ifndef netdev_mc_count +#define netdev_mc_count(dev) ((dev)->mc_count) +#endif +#ifndef netdev_mc_empty +#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) +#endif +#ifndef netdev_for_each_mc_addr +#define netdev_for_each_mc_addr(mclist, dev) \ + for (mclist = dev->mc_list; mclist; mclist = mclist->next) +#endif +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc.count) +#endif +#ifndef netdev_uc_empty +#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(ha, dev) \ + list_for_each_entry(ha, &dev->uc.list, list) +#endif +#ifndef dma_set_coherent_mask +#define dma_set_coherent_mask(dev,mask) \ + pci_set_consistent_dma_mask(to_pci_dev(dev),(mask)) +#endif +#ifndef pci_dev_run_wake +#define pci_dev_run_wake(pdev) (0) +#endif + +/* netdev logging taken from include/linux/netdevice.h */ +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (dev->reg_state != NETREG_REGISTERED) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#undef netdev_printk +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct adapter_struct *kc_adapter = netdev_priv(netdev);\ + struct pci_dev *pdev = kc_adapter->pdev; \ + printk("%s %s: " format, level, pci_name(pdev), \ + ##args); \ +} while(0) +#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct adapter_struct *kc_adapter = netdev_priv(netdev);\ + struct pci_dev *pdev = kc_adapter->pdev; \ + struct device *dev = pci_dev_to_dev(pdev); \ + dev_printk(level, dev, "%s: " format, \ + netdev_name(netdev), ##args); \ +} while(0) +#else /* 2.6.21 => 2.6.34 */ +#define netdev_printk(level, netdev, format, args...) \ + dev_printk(level, (netdev)->dev.parent, \ + "%s: " format, \ + netdev_name(netdev), ##args) +#endif /* <2.6.0 <2.6.21 <2.6.34 */ +#undef netdev_emerg +#define netdev_emerg(dev, format, args...) \ + netdev_printk(KERN_EMERG, dev, format, ##args) +#undef netdev_alert +#define netdev_alert(dev, format, args...) \ + netdev_printk(KERN_ALERT, dev, format, ##args) +#undef netdev_crit +#define netdev_crit(dev, format, args...) \ + netdev_printk(KERN_CRIT, dev, format, ##args) +#undef netdev_err +#define netdev_err(dev, format, args...) \ + netdev_printk(KERN_ERR, dev, format, ##args) +#undef netdev_warn +#define netdev_warn(dev, format, args...) \ + netdev_printk(KERN_WARNING, dev, format, ##args) +#undef netdev_notice +#define netdev_notice(dev, format, args...) \ + netdev_printk(KERN_NOTICE, dev, format, ##args) +#undef netdev_info +#define netdev_info(dev, format, args...) \ + netdev_printk(KERN_INFO, dev, format, ##args) +#undef netdev_dbg +#if defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) +#elif defined(CONFIG_DYNAMIC_DEBUG) +#define netdev_dbg(__dev, format, args...) \ +do { \ + dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \ + netdev_name(__dev), ##args); \ +} while (0) +#else /* DEBUG */ +#define netdev_dbg(__dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args); \ + 0; \ +}) +#endif /* DEBUG */ + +#undef netif_printk +#define netif_printk(priv, type, level, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_printk(level, (dev), fmt, ##args); \ +} while (0) + +#undef netif_emerg +#define netif_emerg(priv, type, dev, fmt, args...) \ + netif_level(emerg, priv, type, dev, fmt, ##args) +#undef netif_alert +#define netif_alert(priv, type, dev, fmt, args...) \ + netif_level(alert, priv, type, dev, fmt, ##args) +#undef netif_crit +#define netif_crit(priv, type, dev, fmt, args...) \ + netif_level(crit, priv, type, dev, fmt, ##args) +#undef netif_err +#define netif_err(priv, type, dev, fmt, args...) \ + netif_level(err, priv, type, dev, fmt, ##args) +#undef netif_warn +#define netif_warn(priv, type, dev, fmt, args...) \ + netif_level(warn, priv, type, dev, fmt, ##args) +#undef netif_notice +#define netif_notice(priv, type, dev, fmt, args...) \ + netif_level(notice, priv, type, dev, fmt, ##args) +#undef netif_info +#define netif_info(priv, type, dev, fmt, args...) \ + netif_level(info, priv, type, dev, fmt, ##args) + +#ifdef SET_SYSTEM_SLEEP_PM_OPS +#define HAVE_SYSTEM_SLEEP_PM_OPS +#endif + +#ifndef for_each_set_bit +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) +#endif /* for_each_set_bit */ + +#ifndef DEFINE_DMA_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN +#define dma_unmap_addr pci_unmap_addr +#define dma_unmap_addr_set pci_unmap_addr_set +#define dma_unmap_len pci_unmap_len +#define dma_unmap_len_set pci_unmap_len_set +#endif /* DEFINE_DMA_UNMAP_ADDR */ +#else /* < 2.6.34 */ +#define HAVE_SYSTEM_SLEEP_PM_OPS +#ifndef HAVE_SET_RX_MODE +#define HAVE_SET_RX_MODE +#endif + +#endif /* < 2.6.34 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) +#ifndef numa_node_id +#define numa_node_id() 0 +#endif +#ifdef HAVE_TX_MQ +#include +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int); +#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues +#else /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#define netif_set_real_num_tx_queues(_netdev, _count) \ + do { \ + (_netdev)->egress_subqueue_count = _count; \ + } while (0) +#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#else +#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0) +#endif /* HAVE_TX_MQ */ +#ifndef ETH_FLAG_RXHASH +#define ETH_FLAG_RXHASH (1<<28) +#endif /* ETH_FLAG_RXHASH */ +#else /* < 2.6.35 */ +#define HAVE_PM_QOS_REQUEST_LIST +#define HAVE_IRQ_AFFINITY_HINT +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32); +#define ethtool_op_set_flags _kc_ethtool_op_set_flags +extern u32 _kc_ethtool_op_get_flags(struct net_device *); +#define ethtool_op_get_flags _kc_ethtool_op_get_flags + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#ifdef NET_IP_ALIGN +#undef NET_IP_ALIGN +#endif +#define NET_IP_ALIGN 0 +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#ifdef NET_SKB_PAD +#undef NET_SKB_PAD +#endif + +#if (L1_CACHE_BYTES > 32) +#define NET_SKB_PAD L1_CACHE_BYTES +#else +#define NET_SKB_PAD 32 +#endif + +static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC); + if (skb) { +#if (NET_IP_ALIGN + NET_SKB_PAD) + skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); +#endif + skb->dev = dev; + } + return skb; +} + +#ifdef netdev_alloc_skb_ip_align +#undef netdev_alloc_skb_ip_align +#endif +#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l) + +#undef netif_level +#define netif_level(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#undef usleep_range +#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000)) + +#else /* < 2.6.36 */ +#define HAVE_PM_QOS_REQUEST_ACTIVE +#define HAVE_8021P_SUPPORT +#define HAVE_NDO_GET_STATS64 +#endif /* < 2.6.36 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) ) +#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) +#endif +#ifndef VLAN_N_VID +#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN +#endif /* VLAN_N_VID */ +#ifndef ETH_FLAG_TXVLAN +#define ETH_FLAG_TXVLAN (1 << 7) +#endif /* ETH_FLAG_TXVLAN */ +#ifndef ETH_FLAG_RXVLAN +#define ETH_FLAG_RXVLAN (1 << 8) +#endif /* ETH_FLAG_RXVLAN */ + +static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb) +{ + WARN_ON(skb->ip_summed != CHECKSUM_NONE); +} +#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb) + +static inline void *_kc_vzalloc_node(unsigned long size, int node) +{ + void *addr = vmalloc_node(size, node); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node) + +static inline void *_kc_vzalloc(unsigned long size) +{ + void *addr = vmalloc(size); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc(_size) _kc_vzalloc(_size) + +#ifndef vlan_get_protocol +static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb) +{ + if (vlan_tx_tag_present(skb) || + skb->protocol != cpu_to_be16(ETH_P_8021Q)) + return skb->protocol; + + if (skb_headlen(skb) < sizeof(struct vlan_ethhdr)) + return 0; + + return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto; +} +#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb) +#endif +#ifdef HAVE_HW_TIME_STAMP +#define SKBTX_HW_TSTAMP (1 << 0) +#define SKBTX_IN_PROGRESS (1 << 2) +#define SKB_SHARED_TX_IS_UNION +#endif +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) ) +#ifndef HAVE_VLAN_RX_REGISTER +#define HAVE_VLAN_RX_REGISTER +#endif +#endif /* > 2.4.18 */ +#endif /* < 2.6.37 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define skb_checksum_start_offset(skb) skb_transport_offset(skb) +#else /* 2.6.22 -> 2.6.37 */ +static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb) +{ + return skb->csum_start - skb_headroom(skb); +} +#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb) +#endif /* 2.6.22 -> 2.6.37 */ +#ifdef CONFIG_DCB +#ifndef IEEE_8021QAZ_MAX_TCS +#define IEEE_8021QAZ_MAX_TCS 8 +#endif +#ifndef DCB_CAP_DCBX_HOST +#define DCB_CAP_DCBX_HOST 0x01 +#endif +#ifndef DCB_CAP_DCBX_LLD_MANAGED +#define DCB_CAP_DCBX_LLD_MANAGED 0x02 +#endif +#ifndef DCB_CAP_DCBX_VER_CEE +#define DCB_CAP_DCBX_VER_CEE 0x04 +#endif +#ifndef DCB_CAP_DCBX_VER_IEEE +#define DCB_CAP_DCBX_VER_IEEE 0x08 +#endif +#ifndef DCB_CAP_DCBX_STATIC +#define DCB_CAP_DCBX_STATIC 0x10 +#endif +#endif /* CONFIG_DCB */ +#else /* < 2.6.38 */ +#endif /* < 2.6.38 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#ifndef skb_queue_reverse_walk_safe +#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ + for (skb = (queue)->prev, tmp = skb->prev; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->prev) +#endif +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) +extern u8 _kc_netdev_get_num_tc(struct net_device *dev); +#define netdev_get_num_tc(dev) _kc_netdev_get_num_tc(dev) +extern u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up); +#define netdev_get_prio_tc_map(dev, up) _kc_netdev_get_prio_tc_map(dev, up) +#define netdev_set_prio_tc_map(dev, up, tc) do {} while (0) +#else /* RHEL6.1 or greater */ +#ifndef HAVE_MQPRIO +#define HAVE_MQPRIO +#endif /* HAVE_MQPRIO */ +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_IEEE +#define HAVE_DCBNL_IEEE +#ifndef IEEE_8021QAZ_TSA_STRICT +#define IEEE_8021QAZ_TSA_STRICT 0 +#endif +#ifndef IEEE_8021QAZ_TSA_ETS +#define IEEE_8021QAZ_TSA_ETS 2 +#endif +#ifndef IEEE_8021QAZ_APP_SEL_ETHERTYPE +#define IEEE_8021QAZ_APP_SEL_ETHERTYPE 1 +#endif +#endif +#endif /* CONFIG_DCB */ +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ +#else /* < 2.6.39 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifndef HAVE_MQPRIO +#define HAVE_MQPRIO +#endif +#ifndef HAVE_SETUP_TC +#define HAVE_SETUP_TC +#endif +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_IEEE +#define HAVE_DCBNL_IEEE +#endif +#endif /* CONFIG_DCB */ +#ifndef HAVE_NDO_SET_FEATURES +#define HAVE_NDO_SET_FEATURES +#endif +#endif /* < 2.6.39 */ + +/*****************************************************************************/ +/* use < 2.6.40 because of a Fedora 15 kernel update where they + * updated the kernel version to 2.6.40.x and they back-ported 3.0 features + * like set_phys_id for ethtool. + */ +#undef ETHTOOL_GRXRINGS +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) ) +#ifdef ETHTOOL_GRXRINGS +#ifndef FLOW_EXT +#define FLOW_EXT 0x80000000 +union _kc_ethtool_flow_union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + __u8 hdata[60]; +}; +struct _kc_ethtool_flow_ext { + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; +}; +struct _kc_ethtool_rx_flow_spec { + __u32 flow_type; + union _kc_ethtool_flow_union h_u; + struct _kc_ethtool_flow_ext h_ext; + union _kc_ethtool_flow_union m_u; + struct _kc_ethtool_flow_ext m_ext; + __u64 ring_cookie; + __u32 location; +}; +#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec +#endif /* FLOW_EXT */ +#endif + +#define pci_disable_link_state_locked pci_disable_link_state + +#ifndef PCI_LTR_VALUE_MASK +#define PCI_LTR_VALUE_MASK 0x000003ff +#endif +#ifndef PCI_LTR_SCALE_MASK +#define PCI_LTR_SCALE_MASK 0x00001c00 +#endif +#ifndef PCI_LTR_SCALE_SHIFT +#define PCI_LTR_SCALE_SHIFT 10 +#endif + +#else /* < 2.6.40 */ +#define HAVE_ETHTOOL_SET_PHYS_ID +#endif /* < 2.6.40 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#ifndef __netdev_alloc_skb_ip_align +#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l) +#endif /* __netdev_alloc_skb_ip_align */ +#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app) +#define dcb_ieee_delapp(dev, app) 0 +#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority) +#else /* < 3.1.0 */ +#ifndef HAVE_DCBNL_IEEE_DELAPP +#define HAVE_DCBNL_IEEE_DELAPP +#endif +#endif /* < 3.1.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#ifdef ETHTOOL_GRXRINGS +#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS +#endif /* ETHTOOL_GRXRINGS */ + +#ifndef skb_frag_size +#define skb_frag_size(frag) _kc_skb_frag_size(frag) +static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag) +{ + return frag->size; +} +#endif /* skb_frag_size */ + +#ifndef skb_frag_size_sub +#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta) +static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta) +{ + frag->size -= delta; +} +#endif /* skb_frag_size_sub */ + +#ifndef skb_frag_page +#define skb_frag_page(frag) _kc_skb_frag_page(frag) +static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag) +{ + return frag->page; +} +#endif /* skb_frag_page */ + +#ifndef skb_frag_address +#define skb_frag_address(frag) _kc_skb_frag_address(frag) +static inline void *_kc_skb_frag_address(const skb_frag_t *frag) +{ + return page_address(skb_frag_page(frag)) + frag->page_offset; +} +#endif /* skb_frag_address */ + +#ifndef skb_frag_dma_map +#define skb_frag_dma_map(dev,frag,offset,size,dir) \ + _kc_skb_frag_dma_map(dev,frag,offset,size,dir) +static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + return dma_map_page(dev, skb_frag_page(frag), + frag->page_offset + offset, size, dir); +} +#endif /* skb_frag_dma_map */ + +#ifndef __skb_frag_unref +#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag) +static inline void __kc_skb_frag_unref(skb_frag_t *frag) +{ + put_page(skb_frag_page(frag)); +} +#endif /* __skb_frag_unref */ +#else /* < 3.2.0 */ +#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_VF_SPOOFCHK_CONFIGURE +#endif +#endif /* < 3.2.0 */ + +#if (RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) +#undef ixgbe_get_netdev_tc_txq +#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc]) +#endif + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) ) +typedef u32 kni_netdev_features_t; +#else /* ! < 3.3.0 */ +typedef netdev_features_t kni_netdev_features_t; +#define HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef ETHTOOL_SRXNTUPLE +#undef ETHTOOL_SRXNTUPLE +#endif +#endif /* < 3.3.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +#ifndef NETIF_F_RXFCS +#define NETIF_F_RXFCS 0 +#endif /* NETIF_F_RXFCS */ +#ifndef NETIF_F_RXALL +#define NETIF_F_RXALL 0 +#endif /* NETIF_F_RXALL */ + +#define NUMTCS_RETURNS_U8 + + +#endif /* < 3.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ) +static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2) +{ + return !compare_ether_addr(addr1, addr2); +} +#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2)) +#else +#define HAVE_FDB_OPS +#endif /* < 3.5.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) ) +#define NETIF_F_HW_VLAN_TX NETIF_F_HW_VLAN_CTAG_TX +#define NETIF_F_HW_VLAN_RX NETIF_F_HW_VLAN_CTAG_RX +#define NETIF_F_HW_VLAN_FILTER NETIF_F_HW_VLAN_CTAG_FILTER +#endif /* >= 3.10.0 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +extern int __kc_pci_vfs_assigned(struct pci_dev *dev); +#else +static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + return 0; +} +#endif +#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev) + +#endif + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) +#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) +#endif /* >= 3.16.0 */ + +#endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h new file mode 100644 index 00000000..a0e5cb6b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -0,0 +1,149 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#ifndef _KNI_DEV_H_ +#define _KNI_DEV_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef RTE_KNI_VHOST +#include +#endif + +#include +#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ + +/** + * A structure describing the private information for a kni device. + */ + +struct kni_dev { + /* kni list */ + struct list_head list; + + struct net_device_stats stats; + int status; + uint16_t group_id; /* Group ID of a group of KNI devices */ + unsigned core_id; /* Core ID to bind */ + char name[RTE_KNI_NAMESIZE]; /* Network device name */ + struct task_struct *pthread; + + /* wait queue for req/resp */ + wait_queue_head_t wq; + struct mutex sync_lock; + + /* PCI device id */ + uint16_t device_id; + + /* kni device */ + struct net_device *net_dev; + struct net_device *lad_dev; + struct pci_dev *pci_dev; + + /* queue for packets to be sent out */ + void *tx_q; + + /* queue for the packets received */ + void *rx_q; + + /* queue for the allocated mbufs those can be used to save sk buffs */ + void *alloc_q; + + /* free queue for the mbufs to be freed */ + void *free_q; + + /* request queue */ + void *req_q; + + /* response queue */ + void *resp_q; + + void * sync_kva; + void *sync_va; + + void *mbuf_kva; + void *mbuf_va; + + /* mbuf size */ + unsigned mbuf_size; + + /* synchro for request processing */ + unsigned long synchro; + +#ifdef RTE_KNI_VHOST + struct kni_vhost_queue* vhost_queue; + volatile enum { + BE_STOP = 0x1, + BE_START = 0x2, + BE_FINISH = 0x4, + }vq_status; +#endif +}; + +#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args) +#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args) +#ifdef RTE_KNI_KO_DEBUG + #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args) +#else + #define KNI_DBG(args...) +#endif + +#ifdef RTE_KNI_VHOST +unsigned int +kni_poll(struct file *file, struct socket *sock, poll_table * wait); +int kni_chk_vhost_rx(struct kni_dev *kni); +int kni_vhost_init(struct kni_dev *kni); +int kni_vhost_backend_release(struct kni_dev *kni); + +struct kni_vhost_queue { + struct sock sk; + struct socket *sock; + int vnet_hdr_sz; + struct kni_dev *kni; + int sockfd; + unsigned int flags; + struct sk_buff* cache; + struct rte_kni_fifo* fifo; +}; + +#endif + +#ifdef RTE_KNI_VHOST_DEBUG_RX + #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args) +#else + #define KNI_DBG_RX(args...) +#endif + +#ifdef RTE_KNI_VHOST_DEBUG_TX + #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args) +#else + #define KNI_DBG_TX(args...) +#endif + +#endif diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c new file mode 100644 index 00000000..06b6d463 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_ethtool.c @@ -0,0 +1,217 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#include +#include +#include +#include "kni_dev.h" + +static int +kni_check_if_running(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + if (priv->lad_dev) + return 0; + else + return -EOPNOTSUPP; +} + +static void +kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info); +} + +static int +kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd); +} + +static int +kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd); +} + +static void +kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol); +} + +static int +kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol); +} + +static int +kni_nway_reset(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev); +} + +static int +kni_get_eeprom_len(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev); +} + +static int +kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, + u8 *bytes) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom, + bytes); +} + +static int +kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, + u8 *bytes) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom, + bytes); +} + +static void +kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring); +} + +static int +kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring); +} + +static void +kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause); +} + +static int +kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev, + pause); +} + +static u32 +kni_get_msglevel(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev); +} + +static void +kni_set_msglevel(struct net_device *dev, u32 data) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data); +} + +static int +kni_get_regs_len(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev); +} + +static void +kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p); +} + +static void +kni_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset, + data); +} + +static int +kni_get_sset_count(struct net_device *dev, int sset) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset); +} + +static void +kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, + u64 *data) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats, + data); +} + +struct ethtool_ops kni_ethtool_ops = { + .begin = kni_check_if_running, + .get_drvinfo = kni_get_drvinfo, + .get_settings = kni_get_settings, + .set_settings = kni_set_settings, + .get_regs_len = kni_get_regs_len, + .get_regs = kni_get_regs, + .get_wol = kni_get_wol, + .set_wol = kni_set_wol, + .nway_reset = kni_nway_reset, + .get_link = ethtool_op_get_link, + .get_eeprom_len = kni_get_eeprom_len, + .get_eeprom = kni_get_eeprom, + .set_eeprom = kni_set_eeprom, + .get_ringparam = kni_get_ringparam, + .set_ringparam = kni_set_ringparam, + .get_pauseparam = kni_get_pauseparam, + .set_pauseparam = kni_set_pauseparam, + .get_msglevel = kni_get_msglevel, + .set_msglevel = kni_set_msglevel, + .get_strings = kni_get_strings, + .get_sset_count = kni_get_sset_count, + .get_ethtool_stats = kni_get_ethtool_stats, +}; + +void +kni_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &kni_ethtool_ops; +} diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h new file mode 100644 index 00000000..3ea750e2 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h @@ -0,0 +1,108 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#ifndef _KNI_FIFO_H_ +#define _KNI_FIFO_H_ + +#include + +/** + * Adds num elements into the fifo. Return the number actually written + */ +static inline unsigned +kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) +{ + unsigned i = 0; + unsigned fifo_write = fifo->write; + unsigned fifo_read = fifo->read; + unsigned new_write = fifo_write; + + for (i = 0; i < num; i++) { + new_write = (new_write + 1) & (fifo->len - 1); + + if (new_write == fifo_read) + break; + fifo->buffer[fifo_write] = data[i]; + fifo_write = new_write; + } + fifo->write = fifo_write; + + return i; +} + +/** + * Get up to num elements from the fifo. Return the number actully read + */ +static inline unsigned +kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) +{ + unsigned i = 0; + unsigned new_read = fifo->read; + unsigned fifo_write = fifo->write; + + for (i = 0; i < num; i++) { + if (new_read == fifo_write) + break; + + data[i] = fifo->buffer[new_read]; + new_read = (new_read + 1) & (fifo->len - 1); + } + fifo->read = new_read; + + return i; +} + +/** + * Get the num of elements in the fifo + */ +static inline unsigned +kni_fifo_count(struct rte_kni_fifo *fifo) +{ + return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1); +} + +/** + * Get the num of available elements in the fifo + */ +static inline unsigned +kni_fifo_free_count(struct rte_kni_fifo *fifo) +{ + return (fifo->read - fifo->write - 1) & (fifo->len - 1); +} + +#ifdef RTE_KNI_VHOST +/** + * Initializes the kni fifo structure + */ +static inline void +kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size) +{ + fifo->write = 0; + fifo->read = 0; + fifo->len = size; + fifo->elem_size = sizeof(void *); +} +#endif + +#endif /* _KNI_FIFO_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c new file mode 100644 index 00000000..ae8133f3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -0,0 +1,688 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "kni_dev.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Kernel Module for managing kni devices"); + +#define KNI_RX_LOOP_NUM 1000 + +#define KNI_MAX_DEVICES 32 + +extern void kni_net_rx(struct kni_dev *kni); +extern void kni_net_init(struct net_device *dev); +extern void kni_net_config_lo_mode(char *lo_str); +extern void kni_net_poll_resp(struct kni_dev *kni); +extern void kni_set_ethtool_ops(struct net_device *netdev); + +extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +extern void ixgbe_kni_remove(struct pci_dev *pdev); +extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +extern void igb_kni_remove(struct pci_dev *pdev); + +static int kni_open(struct inode *inode, struct file *file); +static int kni_release(struct inode *inode, struct file *file); +static int kni_ioctl(struct inode *inode, unsigned int ioctl_num, + unsigned long ioctl_param); +static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num, + unsigned long ioctl_param); +static int kni_dev_remove(struct kni_dev *dev); + +static int __init kni_parse_kthread_mode(void); + +/* KNI processing for single kernel thread mode */ +static int kni_thread_single(void *unused); +/* KNI processing for multiple kernel thread mode */ +static int kni_thread_multiple(void *param); + +static struct file_operations kni_fops = { + .owner = THIS_MODULE, + .open = kni_open, + .release = kni_release, + .unlocked_ioctl = (void *)kni_ioctl, + .compat_ioctl = (void *)kni_compat_ioctl, +}; + +static struct miscdevice kni_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = KNI_DEVICE, + .fops = &kni_fops, +}; + +/* loopback mode */ +static char *lo_mode = NULL; + +/* Kernel thread mode */ +static char *kthread_mode = NULL; +static unsigned multiple_kthread_on = 0; + +#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ + +static int kni_net_id; + +struct kni_net { + unsigned long device_in_use; /* device in use flag */ + struct task_struct *kni_kthread; + struct rw_semaphore kni_list_lock; + struct list_head kni_list_head; +}; + +static int __net_init kni_init_net(struct net *net) +{ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + struct kni_net *knet = net_generic(net, kni_net_id); +#else + struct kni_net *knet; + int ret; + + knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL); + if (!knet) { + ret = -ENOMEM; + return ret; + } +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + + /* Clear the bit of device in use */ + clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); + + init_rwsem(&knet->kni_list_lock); + INIT_LIST_HEAD(&knet->kni_list_head); + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + return 0; +#else + ret = net_assign_generic(net, kni_net_id, knet); + if (ret < 0) + kfree(knet); + + return ret; +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +} + +static void __net_exit kni_exit_net(struct net *net) +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32) + struct kni_net *knet = net_generic(net, kni_net_id); + + kfree(knet); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +} + +static struct pernet_operations kni_net_ops = { + .init = kni_init_net, + .exit = kni_exit_net, +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + .id = &kni_net_id, + .size = sizeof(struct kni_net), +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +}; + +static int __init +kni_init(void) +{ + int rc; + + KNI_PRINT("######## DPDK kni module loading ########\n"); + + if (kni_parse_kthread_mode() < 0) { + KNI_ERR("Invalid parameter for kthread_mode\n"); + return -EINVAL; + } + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + rc = register_pernet_subsys(&kni_net_ops); +#else + rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + if (rc) + return -EPERM; + + rc = misc_register(&kni_misc); + if (rc != 0) { + KNI_ERR("Misc registration failed\n"); + goto out; + } + + /* Configure the lo mode according to the input parameter */ + kni_net_config_lo_mode(lo_mode); + + KNI_PRINT("######## DPDK kni module loaded ########\n"); + + return 0; + +out: +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + unregister_pernet_subsys(&kni_net_ops); +#else + register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + return rc; +} + +static void __exit +kni_exit(void) +{ + misc_deregister(&kni_misc); +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + unregister_pernet_subsys(&kni_net_ops); +#else + register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + KNI_PRINT("####### DPDK kni module unloaded #######\n"); +} + +static int __init +kni_parse_kthread_mode(void) +{ + if (!kthread_mode) + return 0; + + if (strcmp(kthread_mode, "single") == 0) + return 0; + else if (strcmp(kthread_mode, "multiple") == 0) + multiple_kthread_on = 1; + else + return -1; + + return 0; +} + +static int +kni_open(struct inode *inode, struct file *file) +{ + struct net *net = current->nsproxy->net_ns; + struct kni_net *knet = net_generic(net, kni_net_id); + + /* kni device can be opened by one user only per netns */ + if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) + return -EBUSY; + + /* Create kernel thread for single mode */ + if (multiple_kthread_on == 0) { + KNI_PRINT("Single kernel thread for all KNI devices\n"); + /* Create kernel thread for RX */ + knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet, + "kni_single"); + if (IS_ERR(knet->kni_kthread)) { + KNI_ERR("Unable to create kernel threaed\n"); + return PTR_ERR(knet->kni_kthread); + } + } else + KNI_PRINT("Multiple kernel thread mode enabled\n"); + + file->private_data = get_net(net); + KNI_PRINT("/dev/kni opened\n"); + + return 0; +} + +static int +kni_release(struct inode *inode, struct file *file) +{ + struct net *net = file->private_data; + struct kni_net *knet = net_generic(net, kni_net_id); + struct kni_dev *dev, *n; + + /* Stop kernel thread for single mode */ + if (multiple_kthread_on == 0) { + /* Stop kernel thread */ + kthread_stop(knet->kni_kthread); + knet->kni_kthread = NULL; + } + + down_write(&knet->kni_list_lock); + list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { + /* Stop kernel thread for multiple mode */ + if (multiple_kthread_on && dev->pthread != NULL) { + kthread_stop(dev->pthread); + dev->pthread = NULL; + } + +#ifdef RTE_KNI_VHOST + kni_vhost_backend_release(dev); +#endif + kni_dev_remove(dev); + list_del(&dev->list); + } + up_write(&knet->kni_list_lock); + + /* Clear the bit of device in use */ + clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); + + put_net(net); + KNI_PRINT("/dev/kni closed\n"); + + return 0; +} + +static int +kni_thread_single(void *data) +{ + struct kni_net *knet = data; + int j; + struct kni_dev *dev; + + while (!kthread_should_stop()) { + down_read(&knet->kni_list_lock); + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { + list_for_each_entry(dev, &knet->kni_list_head, list) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); +#else + kni_net_rx(dev); +#endif + kni_net_poll_resp(dev); + } + } + up_read(&knet->kni_list_lock); +#ifdef RTE_KNI_PREEMPT_DEFAULT + /* reschedule out for a while */ + schedule_timeout_interruptible(usecs_to_jiffies( \ + KNI_KTHREAD_RESCHEDULE_INTERVAL)); +#endif + } + + return 0; +} + +static int +kni_thread_multiple(void *param) +{ + int j; + struct kni_dev *dev = (struct kni_dev *)param; + + while (!kthread_should_stop()) { + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); +#else + kni_net_rx(dev); +#endif + kni_net_poll_resp(dev); + } +#ifdef RTE_KNI_PREEMPT_DEFAULT + schedule_timeout_interruptible(usecs_to_jiffies( \ + KNI_KTHREAD_RESCHEDULE_INTERVAL)); +#endif + } + + return 0; +} + +static int +kni_dev_remove(struct kni_dev *dev) +{ + if (!dev) + return -ENODEV; + + switch (dev->device_id) { + #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): + #include + igb_kni_remove(dev->pci_dev); + break; + #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev): + #include + ixgbe_kni_remove(dev->pci_dev); + break; + default: + break; + } + + if (dev->net_dev) { + unregister_netdev(dev->net_dev); + free_netdev(dev->net_dev); + } + + return 0; +} + +static int +kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) +{ + if (!kni || !dev) + return -1; + + /* Check if network name has been used */ + if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { + KNI_ERR("KNI name %s duplicated\n", dev->name); + return -1; + } + + return 0; +} + +static int +kni_ioctl_create(struct net *net, + unsigned int ioctl_num, unsigned long ioctl_param) +{ + struct kni_net *knet = net_generic(net, kni_net_id); + int ret; + struct rte_kni_device_info dev_info; + struct pci_dev *pci = NULL; + struct pci_dev *found_pci = NULL; + struct net_device *net_dev = NULL; + struct net_device *lad_dev = NULL; + struct kni_dev *kni, *dev, *n; + + printk(KERN_INFO "KNI: Creating kni...\n"); + /* Check the buffer size, to avoid warning */ + if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) + return -EINVAL; + + /* Copy kni info from user space */ + ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); + if (ret) { + KNI_ERR("copy_from_user in kni_ioctl_create"); + return -EIO; + } + + /** + * Check if the cpu core id is valid for binding, + * for multiple kernel thread mode. + */ + if (multiple_kthread_on && dev_info.force_bind && + !cpu_online(dev_info.core_id)) { + KNI_ERR("cpu %u is not online\n", dev_info.core_id); + return -EINVAL; + } + + /* Check if it has been created */ + down_read(&knet->kni_list_lock); + list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { + if (kni_check_param(dev, &dev_info) < 0) { + up_read(&knet->kni_list_lock); + return -EINVAL; + } + } + up_read(&knet->kni_list_lock); + + net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, +#ifdef NET_NAME_UNKNOWN + NET_NAME_UNKNOWN, +#endif + kni_net_init); + if (net_dev == NULL) { + KNI_ERR("error allocating device \"%s\"\n", dev_info.name); + return -EBUSY; + } + + dev_net_set(net_dev, net); + + kni = netdev_priv(net_dev); + + kni->net_dev = net_dev; + kni->group_id = dev_info.group_id; + kni->core_id = dev_info.core_id; + strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); + + /* Translate user space info into kernel space info */ + kni->tx_q = phys_to_virt(dev_info.tx_phys); + kni->rx_q = phys_to_virt(dev_info.rx_phys); + kni->alloc_q = phys_to_virt(dev_info.alloc_phys); + kni->free_q = phys_to_virt(dev_info.free_phys); + + kni->req_q = phys_to_virt(dev_info.req_phys); + kni->resp_q = phys_to_virt(dev_info.resp_phys); + kni->sync_va = dev_info.sync_va; + kni->sync_kva = phys_to_virt(dev_info.sync_phys); + + kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys); + kni->mbuf_va = dev_info.mbuf_va; + +#ifdef RTE_KNI_VHOST + kni->vhost_queue = NULL; + kni->vq_status = BE_STOP; +#endif + kni->mbuf_size = dev_info.mbuf_size; + + KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", + (unsigned long long) dev_info.tx_phys, kni->tx_q); + KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", + (unsigned long long) dev_info.rx_phys, kni->rx_q); + KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", + (unsigned long long) dev_info.alloc_phys, kni->alloc_q); + KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n", + (unsigned long long) dev_info.free_phys, kni->free_q); + KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n", + (unsigned long long) dev_info.req_phys, kni->req_q); + KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", + (unsigned long long) dev_info.resp_phys, kni->resp_q); + KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n", + (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva); + KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va); + KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size); + + KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n", + dev_info.bus, + dev_info.devid, + dev_info.function, + dev_info.vendor_id, + dev_info.device_id); + + pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); + + /* Support Ethtool */ + while (pci) { + KNI_PRINT("pci_bus: %02x:%02x:%02x \n", + pci->bus->number, + PCI_SLOT(pci->devfn), + PCI_FUNC(pci->devfn)); + + if ((pci->bus->number == dev_info.bus) && + (PCI_SLOT(pci->devfn) == dev_info.devid) && + (PCI_FUNC(pci->devfn) == dev_info.function)) { + found_pci = pci; + switch (dev_info.device_id) { + #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): + #include + ret = igb_kni_probe(found_pci, &lad_dev); + break; + #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \ + case (dev): + #include + ret = ixgbe_kni_probe(found_pci, &lad_dev); + break; + default: + ret = -1; + break; + } + + KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n", + pci, lad_dev); + if (ret == 0) { + kni->lad_dev = lad_dev; + kni_set_ethtool_ops(kni->net_dev); + } else { + KNI_ERR("Device not supported by ethtool"); + kni->lad_dev = NULL; + } + + kni->pci_dev = found_pci; + kni->device_id = dev_info.device_id; + break; + } + pci = pci_get_device(dev_info.vendor_id, + dev_info.device_id, pci); + } + if (pci) + pci_dev_put(pci); + + ret = register_netdev(net_dev); + if (ret) { + KNI_ERR("error %i registering device \"%s\"\n", + ret, dev_info.name); + kni_dev_remove(kni); + return -ENODEV; + } + +#ifdef RTE_KNI_VHOST + kni_vhost_init(kni); +#endif + + /** + * Create a new kernel thread for multiple mode, set its core affinity, + * and finally wake it up. + */ + if (multiple_kthread_on) { + kni->pthread = kthread_create(kni_thread_multiple, + (void *)kni, + "kni_%s", kni->name); + if (IS_ERR(kni->pthread)) { + kni_dev_remove(kni); + return -ECANCELED; + } + if (dev_info.force_bind) + kthread_bind(kni->pthread, kni->core_id); + wake_up_process(kni->pthread); + } + + down_write(&knet->kni_list_lock); + list_add(&kni->list, &knet->kni_list_head); + up_write(&knet->kni_list_lock); + + return 0; +} + +static int +kni_ioctl_release(struct net *net, + unsigned int ioctl_num, unsigned long ioctl_param) +{ + struct kni_net *knet = net_generic(net, kni_net_id); + int ret = -EINVAL; + struct kni_dev *dev, *n; + struct rte_kni_device_info dev_info; + + if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) + return -EINVAL; + + ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); + if (ret) { + KNI_ERR("copy_from_user in kni_ioctl_release"); + return -EIO; + } + + /* Release the network device according to its name */ + if (strlen(dev_info.name) == 0) + return ret; + + down_write(&knet->kni_list_lock); + list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { + if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0) + continue; + + if (multiple_kthread_on && dev->pthread != NULL) { + kthread_stop(dev->pthread); + dev->pthread = NULL; + } + +#ifdef RTE_KNI_VHOST + kni_vhost_backend_release(dev); +#endif + kni_dev_remove(dev); + list_del(&dev->list); + ret = 0; + break; + } + up_write(&knet->kni_list_lock); + printk(KERN_INFO "KNI: %s release kni named %s\n", + (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); + + return ret; +} + +static int +kni_ioctl(struct inode *inode, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + int ret = -EINVAL; + struct net *net = current->nsproxy->net_ns; + + KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); + + /* + * Switch according to the ioctl called + */ + switch (_IOC_NR(ioctl_num)) { + case _IOC_NR(RTE_KNI_IOCTL_TEST): + /* For test only, not used */ + break; + case _IOC_NR(RTE_KNI_IOCTL_CREATE): + ret = kni_ioctl_create(net, ioctl_num, ioctl_param); + break; + case _IOC_NR(RTE_KNI_IOCTL_RELEASE): + ret = kni_ioctl_release(net, ioctl_num, ioctl_param); + break; + default: + KNI_DBG("IOCTL default\n"); + break; + } + + return ret; +} + +static int +kni_compat_ioctl(struct inode *inode, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + /* 32 bits app on 64 bits OS to be supported later */ + KNI_PRINT("Not implemented.\n"); + + return -EINVAL; +} + +module_init(kni_init); +module_exit(kni_exit); + +module_param(lo_mode, charp, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(lo_mode, +"KNI loopback mode (default=lo_mode_none):\n" +" lo_mode_none Kernel loopback disabled\n" +" lo_mode_fifo Enable kernel loopback with fifo\n" +" lo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n" +"\n" +); + +module_param(kthread_mode, charp, S_IRUGO); +MODULE_PARM_DESC(kthread_mode, +"Kernel thread mode (default=single):\n" +" single Single kernel thread mode enabled.\n" +" multiple Multiple kernel thread mode enabled.\n" +"\n" +); diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c new file mode 100644 index 00000000..cfa83398 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_net.c @@ -0,0 +1,706 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +/* + * This code is inspired from the book "Linux Device Drivers" by + * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates + */ + +#include +#include +#include +#include +#include /* eth_type_trans */ +#include +#include +#include + +#include +#include +#include "kni_dev.h" + +#define WD_TIMEOUT 5 /*jiffies */ + +#define MBUF_BURST_SZ 32 + +#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */ + +/* typedef for rx function */ +typedef void (*kni_net_rx_t)(struct kni_dev *kni); + +static int kni_net_tx(struct sk_buff *skb, struct net_device *dev); +static void kni_net_rx_normal(struct kni_dev *kni); +static void kni_net_rx_lo_fifo(struct kni_dev *kni); +static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni); +static int kni_net_process_request(struct kni_dev *kni, + struct rte_kni_request *req); + +/* kni rx function pointer, with default to normal rx */ +static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal; + +/* + * Open and close + */ +static int +kni_net_open(struct net_device *dev) +{ + int ret; + struct rte_kni_request req; + struct kni_dev *kni = netdev_priv(dev); + + if (kni->lad_dev) + memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); + else + /* + * Generate random mac address. eth_random_addr() is the newer + * version of generating mac address in linux kernel. + */ + random_ether_addr(dev->dev_addr); + + netif_start_queue(dev); + + memset(&req, 0, sizeof(req)); + req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF; + + /* Setting if_up to non-zero means up */ + req.if_up = 1; + ret = kni_net_process_request(kni, &req); + + return (ret == 0) ? req.result : ret; +} + +static int +kni_net_release(struct net_device *dev) +{ + int ret; + struct rte_kni_request req; + struct kni_dev *kni = netdev_priv(dev); + + netif_stop_queue(dev); /* can't transmit any more */ + + memset(&req, 0, sizeof(req)); + req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF; + + /* Setting if_up to 0 means down */ + req.if_up = 0; + ret = kni_net_process_request(kni, &req); + + return (ret == 0) ? req.result : ret; +} + +/* + * Configuration changes (passed on by ifconfig) + */ +static int +kni_net_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* ignore other fields */ + return 0; +} + +/* + * RX: normal working mode + */ +static void +kni_net_rx_normal(struct kni_dev *kni) +{ + unsigned ret; + uint32_t len; + unsigned i, num_rx, num_fq; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va[MBUF_BURST_SZ]; + void * data_kva; + + struct sk_buff *skb; + struct net_device *dev = kni->net_dev; + + /* Get the number of free entries in free_q */ + num_fq = kni_fifo_free_count(kni->free_q); + if (num_fq == 0) { + /* No room on the free_q, bail out */ + return; + } + + /* Calculate the number of entries to dequeue from rx_q */ + num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ); + + /* Burst dequeue from rx_q */ + num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx); + if (num_rx == 0) + return; + + /* Transfer received packets to netif */ + for (i = 0; i < num_rx; i++) { + kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + len = kva->data_len; + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + + skb = dev_alloc_skb(len + 2); + if (!skb) { + KNI_ERR("Out of mem, dropping pkts\n"); + /* Update statistics */ + kni->stats.rx_dropped++; + } + else { + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + memcpy(skb_put(skb, len), data_kva, len); + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* Call netif interface */ + netif_rx_ni(skb); + + /* Update statistics */ + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; + } + } + + /* Burst enqueue mbufs into free_q */ + ret = kni_fifo_put(kni->free_q, (void **)va, num_rx); + if (ret != num_rx) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue entries into free_q\n"); +} + +/* + * RX: loopback with enqueue/dequeue fifos. + */ +static void +kni_net_rx_lo_fifo(struct kni_dev *kni) +{ + unsigned ret; + uint32_t len; + unsigned i, num, num_rq, num_tq, num_aq, num_fq; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va[MBUF_BURST_SZ]; + void * data_kva; + + struct rte_kni_mbuf *alloc_kva; + struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ]; + void *alloc_data_kva; + + /* Get the number of entries in rx_q */ + num_rq = kni_fifo_count(kni->rx_q); + + /* Get the number of free entrie in tx_q */ + num_tq = kni_fifo_free_count(kni->tx_q); + + /* Get the number of entries in alloc_q */ + num_aq = kni_fifo_count(kni->alloc_q); + + /* Get the number of free entries in free_q */ + num_fq = kni_fifo_free_count(kni->free_q); + + /* Calculate the number of entries to be dequeued from rx_q */ + num = min(num_rq, num_tq); + num = min(num, num_aq); + num = min(num, num_fq); + num = min(num, (unsigned)MBUF_BURST_SZ); + + /* Return if no entry to dequeue from rx_q */ + if (num == 0) + return; + + /* Burst dequeue from rx_q */ + ret = kni_fifo_get(kni->rx_q, (void **)va, num); + if (ret == 0) + return; /* Failing should not happen */ + + /* Dequeue entries from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num); + if (ret) { + num = ret; + /* Copy mbufs */ + for (i = 0; i < num; i++) { + kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + len = kva->pkt_len; + data_kva = kva->buf_addr + kva->data_off - + kni->mbuf_va + kni->mbuf_kva; + + alloc_kva = (void *)alloc_va[i] - kni->mbuf_va + + kni->mbuf_kva; + alloc_data_kva = alloc_kva->buf_addr + + alloc_kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + memcpy(alloc_data_kva, data_kva, len); + alloc_kva->pkt_len = len; + alloc_kva->data_len = len; + + kni->stats.tx_bytes += len; + kni->stats.rx_bytes += len; + } + + /* Burst enqueue mbufs into tx_q */ + ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num); + if (ret != num) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbufs into tx_q\n"); + } + + /* Burst enqueue mbufs into free_q */ + ret = kni_fifo_put(kni->free_q, (void **)va, num); + if (ret != num) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbufs into free_q\n"); + + /** + * Update statistic, and enqueue/dequeue failure is impossible, + * as all queues are checked at first. + */ + kni->stats.tx_packets += num; + kni->stats.rx_packets += num; +} + +/* + * RX: loopback with enqueue/dequeue fifos and sk buffer copies. + */ +static void +kni_net_rx_lo_fifo_skb(struct kni_dev *kni) +{ + unsigned ret; + uint32_t len; + unsigned i, num_rq, num_fq, num; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va[MBUF_BURST_SZ]; + void * data_kva; + + struct sk_buff *skb; + struct net_device *dev = kni->net_dev; + + /* Get the number of entries in rx_q */ + num_rq = kni_fifo_count(kni->rx_q); + + /* Get the number of free entries in free_q */ + num_fq = kni_fifo_free_count(kni->free_q); + + /* Calculate the number of entries to dequeue from rx_q */ + num = min(num_rq, num_fq); + num = min(num, (unsigned)MBUF_BURST_SZ); + + /* Return if no entry to dequeue from rx_q */ + if (num == 0) + return; + + /* Burst dequeue mbufs from rx_q */ + ret = kni_fifo_get(kni->rx_q, (void **)va, num); + if (ret == 0) + return; + + /* Copy mbufs to sk buffer and then call tx interface */ + for (i = 0; i < num; i++) { + kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + len = kva->data_len; + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + + skb = dev_alloc_skb(len + 2); + if (skb == NULL) + KNI_ERR("Out of mem, dropping pkts\n"); + else { + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + memcpy(skb_put(skb, len), data_kva, len); + skb->dev = dev; + skb->ip_summed = CHECKSUM_UNNECESSARY; + dev_kfree_skb(skb); + } + + /* Simulate real usage, allocate/copy skb twice */ + skb = dev_alloc_skb(len + 2); + if (skb == NULL) { + KNI_ERR("Out of mem, dropping pkts\n"); + kni->stats.rx_dropped++; + } + else { + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + memcpy(skb_put(skb, len), data_kva, len); + skb->dev = dev; + skb->ip_summed = CHECKSUM_UNNECESSARY; + + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; + + /* call tx interface */ + kni_net_tx(skb, dev); + } + } + + /* enqueue all the mbufs from rx_q into free_q */ + ret = kni_fifo_put(kni->free_q, (void **)&va, num); + if (ret != num) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbufs into free_q\n"); +} + +/* rx interface */ +void +kni_net_rx(struct kni_dev *kni) +{ + /** + * It doesn't need to check if it is NULL pointer, + * as it has a default value + */ + (*kni_net_rx_func)(kni); +} + +/* + * Transmit a packet (called by the kernel) + */ +#ifdef RTE_KNI_VHOST +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#else +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + int len = 0; + unsigned ret; + struct kni_dev *kni = netdev_priv(dev); + struct rte_kni_mbuf *pkt_kva = NULL; + struct rte_kni_mbuf *pkt_va = NULL; + + dev->trans_start = jiffies; /* save the timestamp */ + + /* Check if the length of skb is less than mbuf size */ + if (skb->len > kni->mbuf_size) + goto drop; + + /** + * Check if it has at least one free entry in tx_q and + * one entry in alloc_q. + */ + if (kni_fifo_free_count(kni->tx_q) == 0 || + kni_fifo_count(kni->alloc_q) == 0) { + /** + * If no free entry in tx_q or no entry in alloc_q, + * drops skb and goes out. + */ + goto drop; + } + + /* dequeue a mbuf from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); + if (likely(ret == 1)) { + void *data_kva; + + pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; + data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + + len = skb->len; + memcpy(data_kva, skb->data, len); + if (unlikely(len < ETH_ZLEN)) { + memset(data_kva + len, 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } + pkt_kva->pkt_len = len; + pkt_kva->data_len = len; + + /* enqueue mbuf into tx_q */ + ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); + if (unlikely(ret != 1)) { + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbuf into tx_q\n"); + goto drop; + } + } else { + /* Failing should not happen */ + KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); + goto drop; + } + + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_bytes += len; + kni->stats.tx_packets++; + + return NETDEV_TX_OK; + +drop: + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#endif + +/* + * Deal with a transmit timeout. + */ +static void +kni_net_tx_timeout (struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies, + jiffies - dev->trans_start); + + kni->stats.tx_errors++; + netif_wake_queue(dev); + return; +} + +/* + * Ioctl commands + */ +static int +kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + KNI_DBG("kni_net_ioctl %d\n", + ((struct kni_dev *)netdev_priv(dev))->group_id); + + return 0; +} + +static void +kni_net_set_rx_mode(struct net_device *dev) +{ +} + +static int +kni_net_change_mtu(struct net_device *dev, int new_mtu) +{ + int ret; + struct rte_kni_request req; + struct kni_dev *kni = netdev_priv(dev); + + KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu); + + memset(&req, 0, sizeof(req)); + req.req_id = RTE_KNI_REQ_CHANGE_MTU; + req.new_mtu = new_mtu; + ret = kni_net_process_request(kni, &req); + if (ret == 0 && req.result == 0) + dev->mtu = new_mtu; + + return (ret == 0) ? req.result : ret; +} + +/* + * Checks if the user space application provided the resp message + */ +void +kni_net_poll_resp(struct kni_dev *kni) +{ + if (kni_fifo_count(kni->resp_q)) + wake_up_interruptible(&kni->wq); +} + +/* + * It can be called to process the request. + */ +static int +kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) +{ + int ret = -1; + void *resp_va; + unsigned num; + int ret_val; + + if (!kni || !req) { + KNI_ERR("No kni instance or request\n"); + return -EINVAL; + } + + mutex_lock(&kni->sync_lock); + + /* Construct data */ + memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); + num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); + if (num < 1) { + KNI_ERR("Cannot send to req_q\n"); + ret = -EBUSY; + goto fail; + } + + ret_val = wait_event_interruptible_timeout(kni->wq, + kni_fifo_count(kni->resp_q), 3 * HZ); + if (signal_pending(current) || ret_val <= 0) { + ret = -ETIME; + goto fail; + } + num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); + if (num != 1 || resp_va != kni->sync_va) { + /* This should never happen */ + KNI_ERR("No data in resp_q\n"); + ret = -ENODATA; + goto fail; + } + + memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); + ret = 0; + +fail: + mutex_unlock(&kni->sync_lock); + return ret; +} + +/* + * Return statistics to the caller + */ +static struct net_device_stats * +kni_net_stats(struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + return &kni->stats; +} + +/* + * Fill the eth header + */ +static int +kni_net_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + + memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len); + memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len); + eth->h_proto = htons(type); + + return dev->hard_header_len; +} + + +/* + * Re-fill the eth header + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) +static int +kni_net_rebuild_header(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct ethhdr *eth = (struct ethhdr *) skb->data; + + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + memcpy(eth->h_dest, dev->dev_addr, dev->addr_len); + + return 0; +} +#endif /* < 4.1.0 */ + +/** + * kni_net_set_mac - Change the Ethernet Address of the KNI NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int kni_net_set_mac(struct net_device *netdev, void *p) +{ + struct sockaddr *addr = p; + if (!is_valid_ether_addr((unsigned char *)(addr->sa_data))) + return -EADDRNOTAVAIL; + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + return 0; +} + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) +{ + if (new_carrier) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + return 0; +} +#endif + +static const struct header_ops kni_net_header_ops = { + .create = kni_net_header, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) + .rebuild = kni_net_rebuild_header, +#endif /* < 4.1.0 */ + .cache = NULL, /* disable caching */ +}; + +static const struct net_device_ops kni_net_netdev_ops = { + .ndo_open = kni_net_open, + .ndo_stop = kni_net_release, + .ndo_set_config = kni_net_config, + .ndo_start_xmit = kni_net_tx, + .ndo_change_mtu = kni_net_change_mtu, + .ndo_do_ioctl = kni_net_ioctl, + .ndo_set_rx_mode = kni_net_set_rx_mode, + .ndo_get_stats = kni_net_stats, + .ndo_tx_timeout = kni_net_tx_timeout, + .ndo_set_mac_address = kni_net_set_mac, +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) + .ndo_change_carrier = kni_net_change_carrier, +#endif +}; + +void +kni_net_init(struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + KNI_DBG("kni_net_init\n"); + + init_waitqueue_head(&kni->wq); + mutex_init(&kni->sync_lock); + + ether_setup(dev); /* assign some of the fields */ + dev->netdev_ops = &kni_net_netdev_ops; + dev->header_ops = &kni_net_header_ops; + dev->watchdog_timeo = WD_TIMEOUT; +} + +void +kni_net_config_lo_mode(char *lo_str) +{ + if (!lo_str) { + KNI_PRINT("loopback disabled"); + return; + } + + if (!strcmp(lo_str, "lo_mode_none")) + KNI_PRINT("loopback disabled"); + else if (!strcmp(lo_str, "lo_mode_fifo")) { + KNI_PRINT("loopback mode=lo_mode_fifo enabled"); + kni_net_rx_func = kni_net_rx_lo_fifo; + } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) { + KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled"); + kni_net_rx_func = kni_net_rx_lo_fifo_skb; + } else + KNI_PRINT("Incognizant parameter, loopback disabled"); +} diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c new file mode 100644 index 00000000..a3ca8499 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c @@ -0,0 +1,857 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compat.h" +#include "kni_dev.h" +#include "kni_fifo.h" + +#define RX_BURST_SZ 4 + +extern void put_unused_fd(unsigned int fd); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0) +extern struct file* +sock_alloc_file(struct socket *sock, + int flags, const char *dname); + +extern int get_unused_fd_flags(unsigned flags); + +extern void fd_install(unsigned int fd, struct file *file); + +static int kni_sock_map_fd(struct socket *sock) +{ + struct file *file; + int fd = get_unused_fd_flags(0); + if (fd < 0) + return fd; + + file = sock_alloc_file(sock, 0, NULL); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); + } + fd_install(fd, file); + return fd; +} +#else +#define kni_sock_map_fd(s) sock_map_fd(s, 0) +#endif + +static struct proto kni_raw_proto = { + .name = "kni_vhost", + .owner = THIS_MODULE, + .obj_size = sizeof(struct kni_vhost_queue), +}; + +static inline int +kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, + unsigned offset, unsigned len) +{ + struct rte_kni_mbuf *pkt_kva = NULL; + struct rte_kni_mbuf *pkt_va = NULL; + int ret; + + KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n", +#ifdef HAVE_IOV_ITER_MSGHDR + offset, len, (int)m->msg_iter.iov->iov_len); +#else + offset, len, (int)m->msg_iov->iov_len); +#endif + + /** + * Check if it has at least one free entry in tx_q and + * one entry in alloc_q. + */ + if (kni_fifo_free_count(kni->tx_q) == 0 || + kni_fifo_count(kni->alloc_q) == 0) { + /** + * If no free entry in tx_q or no entry in alloc_q, + * drops skb and goes out. + */ + goto drop; + } + + /* dequeue a mbuf from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); + if (likely(ret == 1)) { + void *data_kva; + + pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; + data_kva = pkt_kva->buf_addr + pkt_kva->data_off + - kni->mbuf_va + kni->mbuf_kva; + +#ifdef HAVE_IOV_ITER_MSGHDR + copy_from_iter(data_kva, len, &m->msg_iter); +#else + memcpy_fromiovecend(data_kva, m->msg_iov, offset, len); +#endif + + if (unlikely(len < ETH_ZLEN)) { + memset(data_kva + len, 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } + pkt_kva->pkt_len = len; + pkt_kva->data_len = len; + + /* enqueue mbuf into tx_q */ + ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); + if (unlikely(ret != 1)) { + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbuf into tx_q\n"); + goto drop; + } + } else { + /* Failing should not happen */ + KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); + goto drop; + } + + /* update statistics */ + kni->stats.tx_bytes += len; + kni->stats.tx_packets++; + + return 0; + +drop: + /* update statistics */ + kni->stats.tx_dropped++; + + return 0; +} + +static inline int +kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, + unsigned offset, unsigned len) +{ + uint32_t pkt_len; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va; + void * data_kva; + struct sk_buff *skb; + struct kni_vhost_queue *q = kni->vhost_queue; + + if (unlikely(q == NULL)) + return 0; + + /* ensure at least one entry in free_q */ + if (unlikely(kni_fifo_free_count(kni->free_q) == 0)) + return 0; + + skb = skb_dequeue(&q->sk.sk_receive_queue); + if (unlikely(skb == NULL)) + return 0; + + kva = (struct rte_kni_mbuf*)skb->data; + + /* free skb to cache */ + skb->data = NULL; + if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1))) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue entries into rx cache fifo\n"); + + pkt_len = kva->data_len; + if (unlikely(pkt_len > len)) + goto drop; + + KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", +#ifdef HAVE_IOV_ITER_MSGHDR + offset, len, pkt_len, (int)m->msg_iter.iov->iov_len); +#else + offset, len, pkt_len, (int)m->msg_iov->iov_len); +#endif + + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; +#ifdef HAVE_IOV_ITER_MSGHDR + if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter))) +#else + if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len))) +#endif + goto drop; + + /* Update statistics */ + kni->stats.rx_bytes += pkt_len; + kni->stats.rx_packets++; + + /* enqueue mbufs into free_q */ + va = (void*)kva - kni->mbuf_kva + kni->mbuf_va; + if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1))) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue entries into free_q\n"); + + KNI_DBG_RX("receive done %d\n", pkt_len); + + return pkt_len; + +drop: + /* Update drop statistics */ + kni->stats.rx_dropped++; + + return 0; +} + +static unsigned int +kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) +{ + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + struct kni_dev *kni; + unsigned int mask = 0; + + if (unlikely(q == NULL || q->kni == NULL)) + return POLLERR; + + kni = q->kni; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) + KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n", + kni->group_id, (uint64_t)sock->wq); +#else + KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n", + kni->group_id, (uint64_t)&sock->wait); +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) + poll_wait(file, &sock->wq->wait, wait); +#else + poll_wait(file, &sock->wait, wait); +#endif + + if (kni_fifo_count(kni->rx_q) > 0) + mask |= POLLIN | POLLRDNORM; + + if (sock_writeable(&q->sk) || +#ifdef SOCKWQ_ASYNC_NOSPACE + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && +#else + (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && +#endif + sock_writeable(&q->sk))) + mask |= POLLOUT | POLLWRNORM; + + return mask; +} + +static inline void +kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q, + struct sk_buff *skb, struct rte_kni_mbuf *va) +{ + struct rte_kni_mbuf *kva; + + kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva; + (skb)->data = (unsigned char*)kva; + (skb)->len = kva->data_len; + skb_queue_tail(&q->sk.sk_receive_queue, skb); +} + +static inline void +kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q, + struct sk_buff **skb, struct rte_kni_mbuf **va) +{ + int i; + for (i = 0; i < RX_BURST_SZ; skb++, va++, i++) + kni_vhost_enqueue(kni, q, *skb, *va); +} + +int +kni_chk_vhost_rx(struct kni_dev *kni) +{ + struct kni_vhost_queue *q = kni->vhost_queue; + unsigned nb_in, nb_mbuf, nb_skb; + const unsigned BURST_MASK = RX_BURST_SZ - 1; + unsigned nb_burst, nb_backlog, i; + struct sk_buff *skb[RX_BURST_SZ]; + struct rte_kni_mbuf *va[RX_BURST_SZ]; + + if (unlikely(BE_STOP & kni->vq_status)) { + kni->vq_status |= BE_FINISH; + return 0; + } + + if (unlikely(q == NULL)) + return 0; + + nb_skb = kni_fifo_count(q->fifo); + nb_mbuf = kni_fifo_count(kni->rx_q); + + nb_in = min(nb_mbuf, nb_skb); + nb_in = min(nb_in, (unsigned)RX_BURST_SZ); + nb_burst = (nb_in & ~BURST_MASK); + nb_backlog = (nb_in & BURST_MASK); + + /* enqueue skb_queue per BURST_SIZE bulk */ + if (0 != nb_burst) { + if (unlikely(RX_BURST_SZ != kni_fifo_get( + kni->rx_q, (void **)&va, + RX_BURST_SZ))) + goto except; + + if (unlikely(RX_BURST_SZ != kni_fifo_get( + q->fifo, (void **)&skb, + RX_BURST_SZ))) + goto except; + + kni_vhost_enqueue_burst(kni, q, skb, va); + } + + /* all leftover, do one by one */ + for (i = 0; i < nb_backlog; ++i) { + if (unlikely(1 != kni_fifo_get( + kni->rx_q,(void **)&va, 1))) + goto except; + + if (unlikely(1 != kni_fifo_get( + q->fifo, (void **)&skb, 1))) + goto except; + + kni_vhost_enqueue(kni, q, *skb, *va); + } + + /* Ondemand wake up */ + if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) || + ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) { + wake_up_interruptible_poll(sk_sleep(&q->sk), + POLLIN | POLLRDNORM | POLLRDBAND); + KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", + nb_mbuf, nb_skb, nb_in); + } + + return 0; + +except: + /* Failing should not happen */ + KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n"); + BUG_ON(1); + + return 0; +} + +static int +#ifdef HAVE_KIOCB_MSG_PARAM +kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +#else +kni_sock_sndmsg(struct socket *sock, + struct msghdr *m, size_t total_len) +#endif /* HAVE_KIOCB_MSG_PARAM */ +{ + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + int vnet_hdr_len = 0; + unsigned long len = total_len; + + if (unlikely(q == NULL || q->kni == NULL)) + return 0; + + KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", +#ifdef HAVE_IOV_ITER_MSGHDR + len, q->flags, (int)m->msg_iter.iov->iov_len); +#else + len, q->flags, (int)m->msg_iovlen); +#endif + +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + if (likely(q->flags & IFF_VNET_HDR)) { + vnet_hdr_len = q->vnet_hdr_sz; + if (unlikely(len < vnet_hdr_len)) + return -EINVAL; + len -= vnet_hdr_len; + } +#endif + + if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz)) + return -EINVAL; + + return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len); +} + +static int +#ifdef HAVE_KIOCB_MSG_PARAM +kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t len, int flags) +#else +kni_sock_rcvmsg(struct socket *sock, + struct msghdr *m, size_t len, int flags) +#endif /* HAVE_KIOCB_MSG_PARAM */ +{ + int vnet_hdr_len = 0; + int pkt_len = 0; + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + static struct virtio_net_hdr + __attribute__ ((unused)) vnet_hdr = { + .flags = 0, + .gso_type = VIRTIO_NET_HDR_GSO_NONE + }; + + if (unlikely(q == NULL || q->kni == NULL)) + return 0; + +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + if (likely(q->flags & IFF_VNET_HDR)) { + vnet_hdr_len = q->vnet_hdr_sz; + if ((len -= vnet_hdr_len) < 0) + return -EINVAL; + } +#endif + + if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni, + m, vnet_hdr_len, len)))) + return 0; + +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + /* no need to copy hdr when no pkt received */ +#ifdef HAVE_IOV_ITER_MSGHDR + if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len, + &m->msg_iter))) +#else + if (unlikely(memcpy_toiovecend(m->msg_iov, + (void *)&vnet_hdr, 0, vnet_hdr_len))) +#endif /* HAVE_IOV_ITER_MSGHDR */ + return -EFAULT; +#endif /* RTE_KNI_VHOST_VNET_HDR_EN */ + KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", + (unsigned long)len, q->flags, pkt_len); + + return pkt_len + vnet_hdr_len; +} + +/* dummy tap like ioctl */ +static int +kni_sock_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct ifreq __user *ifr = argp; + unsigned int __user *up = argp; + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + struct kni_dev *kni; + unsigned int u; + int __user *sp = argp; + int s; + int ret; + + KNI_DBG("tap ioctl cmd 0x%08x\n", cmd); + + switch (cmd) { + case TUNSETIFF: + KNI_DBG("TUNSETIFF\n"); + /* ignore the name, just look at flags */ + if (get_user(u, &ifr->ifr_flags)) + return -EFAULT; + + ret = 0; + if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) + ret = -EINVAL; + else + q->flags = u; + + return ret; + + case TUNGETIFF: + KNI_DBG("TUNGETIFF\n"); + rcu_read_lock_bh(); + kni = rcu_dereference_bh(q->kni); + if (kni) + dev_hold(kni->net_dev); + rcu_read_unlock_bh(); + + if (!kni) + return -ENOLINK; + + ret = 0; + if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) || + put_user(q->flags, &ifr->ifr_flags)) + ret = -EFAULT; + dev_put(kni->net_dev); + return ret; + + case TUNGETFEATURES: + KNI_DBG("TUNGETFEATURES\n"); + u = IFF_TAP | IFF_NO_PI; +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + u |= IFF_VNET_HDR; +#endif + if (put_user(u, up)) + return -EFAULT; + return 0; + + case TUNSETSNDBUF: + KNI_DBG("TUNSETSNDBUF\n"); + if (get_user(u, up)) + return -EFAULT; + + q->sk.sk_sndbuf = u; + return 0; + + case TUNGETVNETHDRSZ: + s = q->vnet_hdr_sz; + if (put_user(s, sp)) + return -EFAULT; + KNI_DBG("TUNGETVNETHDRSZ %d\n", s); + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + KNI_DBG("TUNSETVNETHDRSZ %d\n", s); + q->vnet_hdr_sz = s; + return 0; + + case TUNSETOFFLOAD: + KNI_DBG("TUNSETOFFLOAD %lx\n", arg); +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + /* not support any offload yet */ + if (!(q->flags & IFF_VNET_HDR)) + return -EINVAL; + + return 0; +#else + return -EINVAL; +#endif + + default: + KNI_DBG("NOT SUPPORT\n"); + return -EINVAL; + } +} + +static int +kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* 32 bits app on 64 bits OS to be supported later */ + KNI_PRINT("Not implemented.\n"); + + return -EINVAL; +} + +#define KNI_VHOST_WAIT_WQ_SAFE() \ +do { \ + while ((BE_FINISH | BE_STOP) == kni->vq_status) \ + msleep(1); \ +}while(0) \ + + +static int +kni_sock_release(struct socket *sock) +{ + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + struct kni_dev *kni; + + if (q == NULL) + return 0; + + if (NULL != (kni = q->kni)) { + kni->vq_status = BE_STOP; + KNI_VHOST_WAIT_WQ_SAFE(); + kni->vhost_queue = NULL; + q->kni = NULL; + } + + if (q->sockfd != -1) + q->sockfd = -1; + + sk_set_socket(&q->sk, NULL); + sock->sk = NULL; + + sock_put(&q->sk); + + KNI_DBG("dummy sock release done\n"); + + return 0; +} + +int +kni_sock_getname (struct socket *sock, + struct sockaddr *addr, + int *sockaddr_len, int peer) +{ + KNI_DBG("dummy sock getname\n"); + ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET; + return 0; +} + +static const struct proto_ops kni_socket_ops = { + .getname = kni_sock_getname, + .sendmsg = kni_sock_sndmsg, + .recvmsg = kni_sock_rcvmsg, + .release = kni_sock_release, + .poll = kni_sock_poll, + .ioctl = kni_sock_ioctl, + .compat_ioctl = kni_sock_compat_ioctl, +}; + +static void +kni_sk_write_space(struct sock *sk) +{ + wait_queue_head_t *wqueue; + + if (!sock_writeable(sk) || +#ifdef SOCKWQ_ASYNC_NOSPACE + !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) +#else + !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) +#endif + return; + wqueue = sk_sleep(sk); + if (wqueue && waitqueue_active(wqueue)) + wake_up_interruptible_poll( + wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); +} + +static void +kni_sk_destruct(struct sock *sk) +{ + struct kni_vhost_queue *q = + container_of(sk, struct kni_vhost_queue, sk); + + if (!q) + return; + + /* make sure there's no packet in buffer */ + while (skb_dequeue(&sk->sk_receive_queue) != NULL) + ; + + mb(); + + if (q->fifo != NULL) { + kfree(q->fifo); + q->fifo = NULL; + } + + if (q->cache != NULL) { + kfree(q->cache); + q->cache = NULL; + } +} + +static int +kni_vhost_backend_init(struct kni_dev *kni) +{ + struct kni_vhost_queue *q; + struct net *net = current->nsproxy->net_ns; + int err, i, sockfd; + struct rte_kni_fifo *fifo; + struct sk_buff *elem; + + if (kni->vhost_queue != NULL) + return -1; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) + q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &kni_raw_proto, 0); +#else + q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &kni_raw_proto); +#endif + if (!q) + return -ENOMEM; + + err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock); + if (err) + goto free_sk; + + sockfd = kni_sock_map_fd(q->sock); + if (sockfd < 0) { + err = sockfd; + goto free_sock; + } + + /* cache init */ + q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), + GFP_KERNEL); + if (!q->cache) + goto free_fd; + + fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *) + + sizeof(struct rte_kni_fifo), GFP_KERNEL); + if (!fifo) + goto free_cache; + + kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE); + + for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) { + elem = &q->cache[i]; + kni_fifo_put(fifo, (void**)&elem, 1); + } + q->fifo = fifo; + + /* store sockfd in vhost_queue */ + q->sockfd = sockfd; + + /* init socket */ + q->sock->type = SOCK_RAW; + q->sock->state = SS_CONNECTED; + q->sock->ops = &kni_socket_ops; + sock_init_data(q->sock, &q->sk); + + /* init sock data */ + q->sk.sk_write_space = kni_sk_write_space; + q->sk.sk_destruct = kni_sk_destruct; + q->flags = IFF_NO_PI | IFF_TAP; + q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + q->flags |= IFF_VNET_HDR; +#endif + + /* bind kni_dev with vhost_queue */ + q->kni = kni; + kni->vhost_queue = q; + + wmb(); + + kni->vq_status = BE_START; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) + KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx," + "sk->sk_wq=0x%16llx", + q->sockfd, (uint64_t)q->sock->wq, + (uint64_t)q->sk.sk_wq); +#else + KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx," + "sk->sk_sleep=0x%16llx", + q->sockfd, (uint64_t)&q->sock->wait, + (uint64_t)q->sk.sk_sleep); +#endif + + return 0; + +free_cache: + kfree(q->cache); + q->cache = NULL; + +free_fd: + put_unused_fd(sockfd); + +free_sock: + q->kni = NULL; + kni->vhost_queue = NULL; + kni->vq_status |= BE_FINISH; + sock_release(q->sock); + q->sock->ops = NULL; + q->sock = NULL; + +free_sk: + sk_free((struct sock*)q); + + return err; +} + +/* kni vhost sock sysfs */ +static ssize_t +show_sock_fd(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct net_device *net_dev = container_of(dev, struct net_device, dev); + struct kni_dev *kni = netdev_priv(net_dev); + int sockfd = -1; + if (kni->vhost_queue != NULL) + sockfd = kni->vhost_queue->sockfd; + return snprintf(buf, 10, "%d\n", sockfd); +} + +static ssize_t +show_sock_en(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct net_device *net_dev = container_of(dev, struct net_device, dev); + struct kni_dev *kni = netdev_priv(net_dev); + return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1)); +} + +static ssize_t +set_sock_en(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct net_device *net_dev = container_of(dev, struct net_device, dev); + struct kni_dev *kni = netdev_priv(net_dev); + unsigned long en; + int err = 0; + + if (0 != kstrtoul(buf, 0, &en)) + return -EINVAL; + + if (en) + err = kni_vhost_backend_init(kni); + + return err ? err : count; +} + +static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL); +static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en); +static struct attribute *dev_attrs[] = { + &dev_attr_sock_fd.attr, + &dev_attr_sock_en.attr, + NULL, +}; + +static const struct attribute_group dev_attr_grp = { + .attrs = dev_attrs, +}; + +int +kni_vhost_backend_release(struct kni_dev *kni) +{ + struct kni_vhost_queue *q = kni->vhost_queue; + + if (q == NULL) + return 0; + + /* dettach from kni */ + q->kni = NULL; + + KNI_DBG("release backend done\n"); + + return 0; +} + +int +kni_vhost_init(struct kni_dev *kni) +{ + struct net_device *dev = kni->net_dev; + + if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp)) + sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); + + kni->vq_status = BE_STOP; + + KNI_DBG("kni_vhost_init done\n"); + + return 0; +} diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile new file mode 100644 index 00000000..9d22fb97 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = rte_dom0_mm + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h +MODULE_CFLAGS += -Wall -Werror + +# this lib needs main eal +DEPDIRS-y += lib/librte_eal/linuxapp/eal + +# +# all source are stored in SRCS-y +# + +SRCS-y += dom0_mm_misc.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h new file mode 100644 index 00000000..e6eb97f2 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/compat.h @@ -0,0 +1,15 @@ +/* + * Minimal wrappers to allow compiling xen_dom0 on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h new file mode 100644 index 00000000..9d5ffb22 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h @@ -0,0 +1,107 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _DOM0_MM_DEV_H_ +#define _DOM0_MM_DEV_H_ + +#include +#include +#include +#include +#include + +#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/ +#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/ +#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE) +#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1) +#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/ + +/** + * A structure describing the private information for a dom0 device. + */ +struct dom0_mm_dev { + struct miscdevice miscdev; + uint8_t fail_times; + uint32_t used_memsize; + uint32_t num_mem_ctx; + uint32_t config_memsize; + uint32_t num_bigblock; + struct dom0_mm_data *mm_data[NUM_MEM_CTX]; + struct mutex data_lock; +}; + +struct dom0_mm_data{ + uint32_t refcnt; + uint32_t num_memseg; /**< Number of memory segment. */ + uint32_t mem_size; /**< Size of requesting memory. */ + + char name[DOM0_NAME_MAX]; + + /** Store global memory block IDs used by an instance */ + uint32_t block_num[DOM0_NUM_MEMBLOCK]; + + /** Store memory block information.*/ + struct memblock_info block_info[DOM0_NUM_MEMBLOCK]; + + /** Store memory segment information.*/ + struct memseg_info seg_info[DOM0_NUM_MEMSEG]; +}; + +#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args) +#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args) +#endif diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c new file mode 100644 index 00000000..79630bad --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c @@ -0,0 +1,780 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "dom0_mm_dev.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0"); + +static struct dom0_mm_dev dom0_dev; +static struct kobject *dom0_kobj = NULL; + +static struct memblock_info *rsv_mm_info; + +/* Default configuration for reserved memory size(2048 MB). */ +static uint32_t rsv_memsize = 2048; + +static int dom0_open(struct inode *inode, struct file *file); +static int dom0_release(struct inode *inode, struct file *file); +static int dom0_ioctl(struct file *file, unsigned int ioctl_num, + unsigned long ioctl_param); +static int dom0_mmap(struct file *file, struct vm_area_struct *vma); +static int dom0_memory_free(uint32_t size); +static int dom0_memory_release(struct dom0_mm_data *mm_data); + +static const struct file_operations data_fops = { + .owner = THIS_MODULE, + .open = dom0_open, + .release = dom0_release, + .mmap = dom0_mmap, + .unlocked_ioctl = (void *)dom0_ioctl, +}; + +static ssize_t +show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf) +{ + return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize); +} + +static ssize_t +show_memsize(struct device *dev, struct device_attribute *attr, char *buf) +{ + return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize); +} + +static ssize_t +store_memsize(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int err = 0; + unsigned long mem_size; + + if (0 != kstrtoul(buf, 0, &mem_size)) + return -EINVAL; + + mutex_lock(&dom0_dev.data_lock); + if (0 == mem_size) { + err = -EINVAL; + goto fail; + } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) { + XEN_ERR("configure memory size fail\n"); + err = -EINVAL; + goto fail; + } else + dom0_dev.config_memsize = mem_size; + +fail: + mutex_unlock(&dom0_dev.data_lock); + return err ? err : count; +} + +static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize); +static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL); + +static struct attribute *dev_attrs[] = { + &dev_attr_memsize.attr, + &dev_attr_memsize_rsvd.attr, + NULL, +}; + +/* the memory size unit is MB */ +static const struct attribute_group dev_attr_grp = { + .name = "memsize-mB", + .attrs = dev_attrs, +}; + + +static void +sort_viraddr(struct memblock_info *mb, int cnt) +{ + int i,j; + uint64_t tmp_pfn; + uint64_t tmp_viraddr; + + /*sort virtual address and pfn */ + for(i = 0; i < cnt; i ++) { + for(j = cnt - 1; j > i; j--) { + if(mb[j].pfn < mb[j - 1].pfn) { + tmp_pfn = mb[j - 1].pfn; + mb[j - 1].pfn = mb[j].pfn; + mb[j].pfn = tmp_pfn; + + tmp_viraddr = mb[j - 1].vir_addr; + mb[j - 1].vir_addr = mb[j].vir_addr; + mb[j].vir_addr = tmp_viraddr; + } + } + } +} + +static int +dom0_find_memdata(const char * mem_name) +{ + unsigned i; + int idx = -1; + for(i = 0; i< NUM_MEM_CTX; i++) { + if(dom0_dev.mm_data[i] == NULL) + continue; + if (!strncmp(dom0_dev.mm_data[i]->name, mem_name, + sizeof(char) * DOM0_NAME_MAX)) { + idx = i; + break; + } + } + + return idx; +} + +static int +dom0_find_mempos(void) +{ + unsigned i; + int idx = -1; + + for(i = 0; i< NUM_MEM_CTX; i++) { + if(dom0_dev.mm_data[i] == NULL){ + idx = i; + break; + } + } + + return idx; +} + +static int +dom0_memory_release(struct dom0_mm_data *mm_data) +{ + int idx; + uint32_t num_block, block_id; + + /* each memory block is 2M */ + num_block = mm_data->mem_size / SIZE_PER_BLOCK; + if (num_block == 0) + return -EINVAL; + + /* reset global memory data */ + idx = dom0_find_memdata(mm_data->name); + if (idx >= 0) { + dom0_dev.used_memsize -= mm_data->mem_size; + dom0_dev.mm_data[idx] = NULL; + dom0_dev.num_mem_ctx--; + } + + /* reset these memory blocks status as free */ + for (idx = 0; idx < num_block; idx++) { + block_id = mm_data->block_num[idx]; + rsv_mm_info[block_id].used = 0; + } + + memset(mm_data, 0, sizeof(struct dom0_mm_data)); + vfree(mm_data); + return 0; +} + +static int +dom0_memory_free(uint32_t rsv_size) +{ + uint64_t vstart, vaddr; + uint32_t i, num_block, size; + + if (!xen_pv_domain()) + return -1; + + /* each memory block is 2M */ + num_block = rsv_size / SIZE_PER_BLOCK; + if (num_block == 0) + return -EINVAL; + + /* free all memory blocks of size of 4M and destroy contiguous region */ + for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) { + vstart = rsv_mm_info[i].vir_addr; + if (vstart) { + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) + if (rsv_mm_info[i].exchange_flag) + xen_destroy_contiguous_region(vstart, + DOM0_CONTIG_NUM_ORDER); + if (rsv_mm_info[i + 1].exchange_flag) + xen_destroy_contiguous_region(vstart + + DOM0_MEMBLOCK_SIZE, + DOM0_CONTIG_NUM_ORDER); + #else + if (rsv_mm_info[i].exchange_flag) + xen_destroy_contiguous_region(rsv_mm_info[i].pfn + * PAGE_SIZE, + DOM0_CONTIG_NUM_ORDER); + if (rsv_mm_info[i + 1].exchange_flag) + xen_destroy_contiguous_region(rsv_mm_info[i].pfn + * PAGE_SIZE + DOM0_MEMBLOCK_SIZE, + DOM0_CONTIG_NUM_ORDER); + #endif + + size = DOM0_MEMBLOCK_SIZE * 2; + vaddr = vstart; + while (size > 0) { + ClearPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + free_pages(vstart, MAX_NUM_ORDER); + } + } + + /* free all memory blocks size of 2M and destroy contiguous region */ + for (; i < num_block; i++) { + vstart = rsv_mm_info[i].vir_addr; + if (vstart) { + if (rsv_mm_info[i].exchange_flag) + xen_destroy_contiguous_region(vstart, + DOM0_CONTIG_NUM_ORDER); + + size = DOM0_MEMBLOCK_SIZE; + vaddr = vstart; + while (size > 0) { + ClearPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + free_pages(vstart, DOM0_CONTIG_NUM_ORDER); + } + } + + memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); + vfree(rsv_mm_info); + rsv_mm_info = NULL; + + return 0; +} + +static void +find_free_memory(uint32_t count, struct dom0_mm_data *mm_data) +{ + uint32_t i = 0; + uint32_t j = 0; + + while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) { + if (rsv_mm_info[j].used == 0) { + mm_data->block_info[i].pfn = rsv_mm_info[j].pfn; + mm_data->block_info[i].vir_addr = + rsv_mm_info[j].vir_addr; + mm_data->block_info[i].mfn = rsv_mm_info[j].mfn; + mm_data->block_info[i].exchange_flag = + rsv_mm_info[j].exchange_flag; + mm_data->block_num[i] = j; + rsv_mm_info[j].used = 1; + i++; + } + j++; + } +} + +/** + * Find all memory segments in which physical addresses are contiguous. + */ +static void +find_memseg(int count, struct dom0_mm_data * mm_data) +{ + int i = 0; + int j, k, idx = 0; + uint64_t zone_len, pfn, num_block; + + while(i < count) { + if (mm_data->block_info[i].exchange_flag == 0) { + i++; + continue; + } + k = 0; + pfn = mm_data->block_info[i].pfn; + mm_data->seg_info[idx].pfn = pfn; + mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn; + + for (j = i + 1; j < count; j++) { + + /* ignore exchange fail memory block */ + if (mm_data->block_info[j].exchange_flag == 0) + break; + + if (mm_data->block_info[j].pfn != + (mm_data->block_info[j - 1].pfn + + DOM0_MEMBLOCK_SIZE / PAGE_SIZE)) + break; + ++k; + mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn; + } + + num_block = j - i; + zone_len = num_block * DOM0_MEMBLOCK_SIZE; + mm_data->seg_info[idx].size = zone_len; + + XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len); + i = i+ num_block; + idx++; + if (idx == DOM0_NUM_MEMSEG) + break; + } + mm_data->num_memseg = idx; +} + +static int +dom0_memory_reserve(uint32_t rsv_size) +{ + uint64_t pfn, vstart, vaddr; + uint32_t i, num_block, size, allocated_size = 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) + dma_addr_t dma_handle; +#endif + + /* 2M as memory block */ + num_block = rsv_size / SIZE_PER_BLOCK; + + rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block); + if (!rsv_mm_info) { + XEN_ERR("Unable to allocate device memory information\n"); + return -ENOMEM; + } + memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); + + /* try alloc size of 4M once */ + for (i = 0; i < num_block; i += 2) { + vstart = (unsigned long) + __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER); + if (vstart == 0) + break; + + dom0_dev.num_bigblock = i / 2 + 1; + allocated_size = SIZE_PER_BLOCK * (i + 2); + + /* size of 4M */ + size = DOM0_MEMBLOCK_SIZE * 2; + + vaddr = vstart; + while (size > 0) { + SetPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + pfn = virt_to_pfn(vstart); + rsv_mm_info[i].pfn = pfn; + rsv_mm_info[i].vir_addr = vstart; + rsv_mm_info[i + 1].pfn = + pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE; + rsv_mm_info[i + 1].vir_addr = + vstart + DOM0_MEMBLOCK_SIZE; + } + + /*if it failed to alloc 4M, and continue to alloc 2M once */ + for (; i < num_block; i++) { + vstart = (unsigned long) + __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER); + if (vstart == 0) { + XEN_ERR("allocate memory fail.\n"); + dom0_memory_free(allocated_size); + return -ENOMEM; + } + + allocated_size += SIZE_PER_BLOCK; + + size = DOM0_MEMBLOCK_SIZE; + vaddr = vstart; + while (size > 0) { + SetPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + pfn = virt_to_pfn(vstart); + rsv_mm_info[i].pfn = pfn; + rsv_mm_info[i].vir_addr = vstart; + } + + sort_viraddr(rsv_mm_info, num_block); + + for (i = 0; i< num_block; i++) { + + /* + * This API is used to exchage MFN for getting a block of + * contiguous physical addresses, its maximum size is 2M. + */ + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) + if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr, + DOM0_CONTIG_NUM_ORDER, 0) == 0) { + #else + if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE, + DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) { + #endif + rsv_mm_info[i].exchange_flag = 1; + rsv_mm_info[i].mfn = + pfn_to_mfn(rsv_mm_info[i].pfn); + rsv_mm_info[i].used = 0; + } else { + XEN_ERR("exchange memeory fail\n"); + rsv_mm_info[i].exchange_flag = 0; + dom0_dev.fail_times++; + if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) { + dom0_memory_free(rsv_size); + return -EFAULT; + } + } + } + + return 0; +} + +static int +dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data) +{ + uint32_t num_block; + int idx; + + /* check if there is a free name buffer */ + memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX); + mm_data->name[DOM0_NAME_MAX - 1] = '\0'; + idx = dom0_find_mempos(); + if (idx < 0) + return -1; + + num_block = meminfo->size / SIZE_PER_BLOCK; + /* find free memory and new memory segments*/ + find_free_memory(num_block, mm_data); + find_memseg(num_block, mm_data); + + /* update private memory data */ + mm_data->refcnt++; + mm_data->mem_size = meminfo->size; + + /* update global memory data */ + dom0_dev.mm_data[idx] = mm_data; + dom0_dev.num_mem_ctx++; + dom0_dev.used_memsize += mm_data->mem_size; + + return 0; +} + +static int +dom0_check_memory (struct memory_info *meminfo) +{ + int idx; + uint64_t mem_size; + + /* round memory size to the next even number. */ + if (meminfo->size % 2) + ++meminfo->size; + + mem_size = meminfo->size; + if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) { + XEN_ERR("Memory data space is full in Dom0 driver\n"); + return -1; + } + idx = dom0_find_memdata(meminfo->name); + if (idx >= 0) { + XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n", + meminfo->name); + return -1; + } + if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) { + XEN_ERR("Total size can't be larger than reserved size.\n"); + return -1; + } + + return 0; +} + +static int __init +dom0_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + if (rsv_memsize > DOM0_CONFIG_MEMSIZE) { + XEN_ERR("The reserved memory size cannot be greater than %d\n", + DOM0_CONFIG_MEMSIZE); + return -EINVAL; + } + + /* Setup the misc device */ + dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR; + dom0_dev.miscdev.name = "dom0_mm"; + dom0_dev.miscdev.fops = &data_fops; + + /* register misc char device */ + if (misc_register(&dom0_dev.miscdev) != 0) { + XEN_ERR("Misc device registration failed\n"); + return -EPERM; + } + + mutex_init(&dom0_dev.data_lock); + dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj); + + if (!dom0_kobj) { + XEN_ERR("dom0-mm object creation failed\n"); + misc_deregister(&dom0_dev.miscdev); + return -ENOMEM; + } + + if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) { + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + return -EPERM; + } + + if (dom0_memory_reserve(rsv_memsize) < 0) { + sysfs_remove_group(dom0_kobj, &dev_attr_grp); + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + return -ENOMEM; + } + + XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n"); + + return 0; +} + +static void __exit +dom0_exit(void) +{ + if (rsv_mm_info != NULL) + dom0_memory_free(rsv_memsize); + + sysfs_remove_group(dom0_kobj, &dev_attr_grp); + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + + XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n"); +} + +static int +dom0_open(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + + XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n"); + return 0; +} + +static int +dom0_release(struct inode *inode, struct file *file) +{ + int ret = 0; + struct dom0_mm_data *mm_data = file->private_data; + + if (mm_data == NULL) + return ret; + + mutex_lock(&dom0_dev.data_lock); + if (--mm_data->refcnt == 0) + ret = dom0_memory_release(mm_data); + mutex_unlock(&dom0_dev.data_lock); + + file->private_data = NULL; + XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n"); + return ret; +} + +static int +dom0_mmap(struct file *file, struct vm_area_struct *vm) +{ + int status = 0; + uint32_t idx = vm->vm_pgoff; + uint64_t pfn, size = vm->vm_end - vm->vm_start; + struct dom0_mm_data *mm_data = file->private_data; + + if(mm_data == NULL) + return -EINVAL; + + mutex_lock(&dom0_dev.data_lock); + if (idx >= mm_data->num_memseg) { + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + if (size > mm_data->seg_info[idx].size){ + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size); + + pfn = mm_data->seg_info[idx].pfn; + mutex_unlock(&dom0_dev.data_lock); + + status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED); + + return status; +} +static int +dom0_ioctl(struct file *file, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + int idx, ret; + char name[DOM0_NAME_MAX] = {0}; + struct memory_info meminfo; + struct dom0_mm_data *mm_data = file->private_data; + + XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param); + + /** + * Switch according to the ioctl called + */ + switch _IOC_NR(ioctl_num) { + case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG): + ret = copy_from_user(&meminfo, (void *)ioctl_param, + sizeof(struct memory_info)); + if (ret) + return -EFAULT; + + if (mm_data != NULL) { + XEN_ERR("Cannot create memory segment for the same" + " file descriptor\n"); + return -EINVAL; + } + + /* Allocate private data */ + mm_data = vmalloc(sizeof(struct dom0_mm_data)); + if (!mm_data) { + XEN_ERR("Unable to allocate device private data\n"); + return -ENOMEM; + } + memset(mm_data, 0, sizeof(struct dom0_mm_data)); + + mutex_lock(&dom0_dev.data_lock); + /* check if we can allocate memory*/ + if (dom0_check_memory(&meminfo) < 0) { + mutex_unlock(&dom0_dev.data_lock); + vfree(mm_data); + return -EINVAL; + } + + /* allocate memory and created memory segments*/ + if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) { + XEN_ERR("create memory segment fail.\n"); + mutex_unlock(&dom0_dev.data_lock); + return -EIO; + } + + file->private_data = mm_data; + mutex_unlock(&dom0_dev.data_lock); + break; + + /* support multiple process in term of memory mapping*/ + case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG): + ret = copy_from_user(name, (void *)ioctl_param, + sizeof(char) * DOM0_NAME_MAX); + if (ret) + return -EFAULT; + + mutex_lock(&dom0_dev.data_lock); + idx = dom0_find_memdata(name); + if (idx < 0) { + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + mm_data = dom0_dev.mm_data[idx]; + mm_data->refcnt++; + file->private_data = mm_data; + mutex_unlock(&dom0_dev.data_lock); + break; + + case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG): + ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg, + sizeof(int)); + if (ret) + return -EFAULT; + break; + + case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO): + ret = copy_to_user((void *)ioctl_param, + &mm_data->seg_info[0], + sizeof(struct memseg_info) * + mm_data->num_memseg); + if (ret) + return -EFAULT; + break; + default: + XEN_PRINT("IOCTL default \n"); + break; + } + + return 0; +} + +module_init(dom0_init); +module_exit(dom0_exit); + +module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n"); diff --git a/lib/librte_ether/Makefile b/lib/librte_ether/Makefile new file mode 100644 index 00000000..e8102846 --- /dev/null +++ b/lib/librte_ether/Makefile @@ -0,0 +1,59 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = libethdev.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_ether_version.map + +LIBABIVER := 3 + +SRCS-y += rte_ethdev.c + +# +# Export include files +# +SYMLINK-y-include += rte_ether.h +SYMLINK-y-include += rte_ethdev.h +SYMLINK-y-include += rte_eth_ctrl.h +SYMLINK-y-include += rte_dev_info.h + +# this lib depends upon: +DEPDIRS-y += lib/librte_eal lib/librte_mempool lib/librte_ring lib/librte_mbuf + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ether/rte_dev_info.h b/lib/librte_ether/rte_dev_info.h new file mode 100644 index 00000000..291bd4d7 --- /dev/null +++ b/lib/librte_ether/rte_dev_info.h @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEV_INFO_H_ +#define _RTE_DEV_INFO_H_ + +/* + * Placeholder for accessing device registers + */ +struct rte_dev_reg_info { + void *data; /**< Buffer for return registers */ + uint32_t offset; /**< Start register table location for access */ + uint32_t length; /**< Number of registers to fetch */ + uint32_t version; /**< Device version */ +}; + +/* + * Placeholder for accessing device eeprom + */ +struct rte_dev_eeprom_info { + void *data; /**< Buffer for return eeprom */ + uint32_t offset; /**< Start eeprom address for access*/ + uint32_t length; /**< Length of eeprom region to access */ + uint32_t magic; /**< Device-specific key, such as device-id */ +}; + +#endif /* _RTE_DEV_INFO_H_ */ diff --git a/lib/librte_ether/rte_eth_ctrl.h b/lib/librte_ether/rte_eth_ctrl.h new file mode 100644 index 00000000..b8c7be90 --- /dev/null +++ b/lib/librte_ether/rte_eth_ctrl.h @@ -0,0 +1,846 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETH_CTRL_H_ +#define _RTE_ETH_CTRL_H_ + +/** + * @file + * + * Ethernet device features and related data structures used + * by control APIs should be defined in this file. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * A packet can be identified by hardware as different flow types. Different + * NIC hardwares may support different flow types. + * Basically, the NIC hardware identifies the flow type as deep protocol as + * possible, and exclusively. For example, if a packet is identified as + * 'RTE_ETH_FLOW_NONFRAG_IPV4_TCP', it will not be any of other flow types, + * though it is an actual IPV4 packet. + * Note that the flow types are used to define RSS offload types in + * rte_ethdev.h. + */ +#define RTE_ETH_FLOW_UNKNOWN 0 +#define RTE_ETH_FLOW_RAW 1 +#define RTE_ETH_FLOW_IPV4 2 +#define RTE_ETH_FLOW_FRAG_IPV4 3 +#define RTE_ETH_FLOW_NONFRAG_IPV4_TCP 4 +#define RTE_ETH_FLOW_NONFRAG_IPV4_UDP 5 +#define RTE_ETH_FLOW_NONFRAG_IPV4_SCTP 6 +#define RTE_ETH_FLOW_NONFRAG_IPV4_OTHER 7 +#define RTE_ETH_FLOW_IPV6 8 +#define RTE_ETH_FLOW_FRAG_IPV6 9 +#define RTE_ETH_FLOW_NONFRAG_IPV6_TCP 10 +#define RTE_ETH_FLOW_NONFRAG_IPV6_UDP 11 +#define RTE_ETH_FLOW_NONFRAG_IPV6_SCTP 12 +#define RTE_ETH_FLOW_NONFRAG_IPV6_OTHER 13 +#define RTE_ETH_FLOW_L2_PAYLOAD 14 +#define RTE_ETH_FLOW_IPV6_EX 15 +#define RTE_ETH_FLOW_IPV6_TCP_EX 16 +#define RTE_ETH_FLOW_IPV6_UDP_EX 17 +#define RTE_ETH_FLOW_MAX 18 + +/** + * Feature filter types + */ +enum rte_filter_type { + RTE_ETH_FILTER_NONE = 0, + RTE_ETH_FILTER_MACVLAN, + RTE_ETH_FILTER_ETHERTYPE, + RTE_ETH_FILTER_FLEXIBLE, + RTE_ETH_FILTER_SYN, + RTE_ETH_FILTER_NTUPLE, + RTE_ETH_FILTER_TUNNEL, + RTE_ETH_FILTER_FDIR, + RTE_ETH_FILTER_HASH, + RTE_ETH_FILTER_L2_TUNNEL, + RTE_ETH_FILTER_MAX +}; + +/** + * Generic operations on filters + */ +enum rte_filter_op { + /** used to check whether the type filter is supported */ + RTE_ETH_FILTER_NOP = 0, + RTE_ETH_FILTER_ADD, /**< add filter entry */ + RTE_ETH_FILTER_UPDATE, /**< update filter entry */ + RTE_ETH_FILTER_DELETE, /**< delete filter entry */ + RTE_ETH_FILTER_FLUSH, /**< flush all entries */ + RTE_ETH_FILTER_GET, /**< get filter entry */ + RTE_ETH_FILTER_SET, /**< configurations */ + RTE_ETH_FILTER_INFO, /**< retrieve information */ + RTE_ETH_FILTER_STATS, /**< retrieve statistics */ + RTE_ETH_FILTER_OP_MAX +}; + +/** + * MAC filter type + */ +enum rte_mac_filter_type { + RTE_MAC_PERFECT_MATCH = 1, /**< exact match of MAC addr. */ + RTE_MACVLAN_PERFECT_MATCH, /**< exact match of MAC addr and VLAN ID. */ + RTE_MAC_HASH_MATCH, /**< hash match of MAC addr. */ + /** hash match of MAC addr and exact match of VLAN ID. */ + RTE_MACVLAN_HASH_MATCH, +}; + +/** + * MAC filter info + */ +struct rte_eth_mac_filter { + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ + enum rte_mac_filter_type filter_type; /**< MAC filter type */ + struct ether_addr mac_addr; +}; + +/** + * Define all structures for Ethertype Filter type. + */ + +#define RTE_ETHTYPE_FLAGS_MAC 0x0001 /**< If set, compare mac */ +#define RTE_ETHTYPE_FLAGS_DROP 0x0002 /**< If set, drop packet when match */ + +/** + * A structure used to define the ethertype filter entry + * to support RTE_ETH_FILTER_ETHERTYPE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ethertype_filter { + struct ether_addr mac_addr; /**< Mac address to match. */ + uint16_t ether_type; /**< Ether type to match */ + uint16_t flags; /**< Flags from RTE_ETHTYPE_FLAGS_* */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +#define RTE_FLEX_FILTER_MAXLEN 128 /**< bytes to use in flex filter. */ +#define RTE_FLEX_FILTER_MASK_SIZE \ + (RTE_ALIGN(RTE_FLEX_FILTER_MAXLEN, CHAR_BIT) / CHAR_BIT) + /**< mask bytes in flex filter. */ + +/** + * A structure used to define the flex filter entry + * to support RTE_ETH_FILTER_FLEXIBLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_flex_filter { + uint16_t len; + uint8_t bytes[RTE_FLEX_FILTER_MAXLEN]; /**< flex bytes in big endian.*/ + uint8_t mask[RTE_FLEX_FILTER_MASK_SIZE]; /**< if mask bit is 1b, do + not compare corresponding byte. */ + uint8_t priority; + uint16_t queue; /**< Queue assigned to when match. */ +}; + +/** + * A structure used to define the TCP syn filter entry + * to support RTE_ETH_FILTER_SYN with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_syn_filter { + uint8_t hig_pri; /**< 1 - higher priority than other filters, + 0 - lower priority. */ + uint16_t queue; /**< Queue assigned to when match */ +}; + +/** + * Define all structures for ntuple Filter type. + */ + +#define RTE_NTUPLE_FLAGS_DST_IP 0x0001 /**< If set, dst_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_IP 0x0002 /**< If set, src_ip is part of ntuple */ +#define RTE_NTUPLE_FLAGS_DST_PORT 0x0004 /**< If set, dst_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_SRC_PORT 0x0008 /**< If set, src_port is part of ntuple */ +#define RTE_NTUPLE_FLAGS_PROTO 0x0010 /**< If set, protocol is part of ntuple */ +#define RTE_NTUPLE_FLAGS_TCP_FLAG 0x0020 /**< If set, tcp flag is involved */ + +#define RTE_5TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_IP | \ + RTE_NTUPLE_FLAGS_SRC_IP | \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_SRC_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define RTE_2TUPLE_FLAGS ( \ + RTE_NTUPLE_FLAGS_DST_PORT | \ + RTE_NTUPLE_FLAGS_PROTO) + +#define TCP_URG_FLAG 0x20 +#define TCP_ACK_FLAG 0x10 +#define TCP_PSH_FLAG 0x08 +#define TCP_RST_FLAG 0x04 +#define TCP_SYN_FLAG 0x02 +#define TCP_FIN_FLAG 0x01 +#define TCP_FLAG_ALL 0x3F + +/** + * A structure used to define the ntuple filter entry + * to support RTE_ETH_FILTER_NTUPLE with RTE_ETH_FILTER_ADD, + * RTE_ETH_FILTER_DELETE and RTE_ETH_FILTER_GET operations. + */ +struct rte_eth_ntuple_filter { + uint16_t flags; /**< Flags from RTE_NTUPLE_FLAGS_* */ + uint32_t dst_ip; /**< Destination IP address in big endian. */ + uint32_t dst_ip_mask; /**< Mask of destination IP address. */ + uint32_t src_ip; /**< Source IP address in big endian. */ + uint32_t src_ip_mask; /**< Mask of destination IP address. */ + uint16_t dst_port; /**< Destination port in big endian. */ + uint16_t dst_port_mask; /**< Mask of destination port. */ + uint16_t src_port; /**< Source Port in big endian. */ + uint16_t src_port_mask; /**< Mask of source port. */ + uint8_t proto; /**< L4 protocol. */ + uint8_t proto_mask; /**< Mask of L4 protocol. */ + /** tcp_flags only meaningful when the proto is TCP. + The packet matched above ntuple fields and contain + any set bit in tcp_flags will hit this filter. */ + uint8_t tcp_flags; + uint16_t priority; /**< seven levels (001b-111b), 111b is highest, + used when more than one filter matches. */ + uint16_t queue; /**< Queue assigned to when match*/ +}; + +/** + * Tunneled type. + */ +enum rte_eth_tunnel_type { + RTE_TUNNEL_TYPE_NONE = 0, + RTE_TUNNEL_TYPE_VXLAN, + RTE_TUNNEL_TYPE_GENEVE, + RTE_TUNNEL_TYPE_TEREDO, + RTE_TUNNEL_TYPE_NVGRE, + RTE_TUNNEL_TYPE_IP_IN_GRE, + RTE_L2_TUNNEL_TYPE_E_TAG, + RTE_TUNNEL_TYPE_MAX, +}; + +/** + * filter type of tunneling packet + */ +#define ETH_TUNNEL_FILTER_OMAC 0x01 /**< filter by outer MAC addr */ +#define ETH_TUNNEL_FILTER_OIP 0x02 /**< filter by outer IP Addr */ +#define ETH_TUNNEL_FILTER_TENID 0x04 /**< filter by tenant ID */ +#define ETH_TUNNEL_FILTER_IMAC 0x08 /**< filter by inner MAC addr */ +#define ETH_TUNNEL_FILTER_IVLAN 0x10 /**< filter by inner VLAN ID */ +#define ETH_TUNNEL_FILTER_IIP 0x20 /**< filter by inner IP addr */ + +#define RTE_TUNNEL_FILTER_IMAC_IVLAN (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN) +#define RTE_TUNNEL_FILTER_IMAC_IVLAN_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_IVLAN | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_IMAC_TENID (ETH_TUNNEL_FILTER_IMAC | \ + ETH_TUNNEL_FILTER_TENID) +#define RTE_TUNNEL_FILTER_OMAC_TENID_IMAC (ETH_TUNNEL_FILTER_OMAC | \ + ETH_TUNNEL_FILTER_TENID | \ + ETH_TUNNEL_FILTER_IMAC) + +/** + * Select IPv4 or IPv6 for tunnel filters. + */ +enum rte_tunnel_iptype { + RTE_TUNNEL_IPTYPE_IPV4 = 0, /**< IPv4. */ + RTE_TUNNEL_IPTYPE_IPV6, /**< IPv6. */ +}; + +/** + * Tunneling Packet filter configuration. + */ +struct rte_eth_tunnel_filter_conf { + struct ether_addr outer_mac; /**< Outer MAC address to match. */ + struct ether_addr inner_mac; /**< Inner MAC address to match. */ + uint16_t inner_vlan; /**< Inner VLAN to match. */ + enum rte_tunnel_iptype ip_type; /**< IP address type. */ + /** Outer destination IP address to match if ETH_TUNNEL_FILTER_OIP + is set in filter_type, or inner destination IP address to match + if ETH_TUNNEL_FILTER_IIP is set in filter_type . */ + union { + uint32_t ipv4_addr; /**< IPv4 address in big endian. */ + uint32_t ipv6_addr[4]; /**< IPv6 address in big endian. */ + } ip_addr; + /** Flags from ETH_TUNNEL_FILTER_XX - see above. */ + uint16_t filter_type; + enum rte_eth_tunnel_type tunnel_type; /**< Tunnel Type. */ + uint32_t tenant_id; /**< Tenant ID to match. VNI, GRE key... */ + uint16_t queue_id; /**< Queue assigned to if match. */ +}; + +/** + * Global eth device configuration type. + */ +enum rte_eth_global_cfg_type { + RTE_ETH_GLOBAL_CFG_TYPE_UNKNOWN = 0, + RTE_ETH_GLOBAL_CFG_TYPE_GRE_KEY_LEN, + RTE_ETH_GLOBAL_CFG_TYPE_MAX, +}; + +/** + * Global eth device configuration. + */ +struct rte_eth_global_cfg { + enum rte_eth_global_cfg_type cfg_type; /**< Global config type. */ + union { + uint8_t gre_key_len; /**< Valid GRE key length in byte. */ + uint64_t reserved; /**< Reserve space for future use. */ + } cfg; +}; + +#define RTE_ETH_FDIR_MAX_FLEXLEN 16 /**< Max length of flexbytes. */ +#define RTE_ETH_INSET_SIZE_MAX 128 /**< Max length of input set. */ + +/** + * Input set fields for Flow Director and Hash filters + */ +enum rte_eth_input_set_field { + RTE_ETH_INPUT_SET_UNKNOWN = 0, + + /* L2 */ + RTE_ETH_INPUT_SET_L2_SRC_MAC = 1, + RTE_ETH_INPUT_SET_L2_DST_MAC, + RTE_ETH_INPUT_SET_L2_OUTER_VLAN, + RTE_ETH_INPUT_SET_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_L2_ETHERTYPE, + + /* L3 */ + RTE_ETH_INPUT_SET_L3_SRC_IP4 = 129, + RTE_ETH_INPUT_SET_L3_DST_IP4, + RTE_ETH_INPUT_SET_L3_SRC_IP6, + RTE_ETH_INPUT_SET_L3_DST_IP6, + RTE_ETH_INPUT_SET_L3_IP4_TOS, + RTE_ETH_INPUT_SET_L3_IP4_PROTO, + RTE_ETH_INPUT_SET_L3_IP6_TC, + RTE_ETH_INPUT_SET_L3_IP6_NEXT_HEADER, + RTE_ETH_INPUT_SET_L3_IP4_TTL, + RTE_ETH_INPUT_SET_L3_IP6_HOP_LIMITS, + + /* L4 */ + RTE_ETH_INPUT_SET_L4_UDP_SRC_PORT = 257, + RTE_ETH_INPUT_SET_L4_UDP_DST_PORT, + RTE_ETH_INPUT_SET_L4_TCP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_TCP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_SRC_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_DST_PORT, + RTE_ETH_INPUT_SET_L4_SCTP_VERIFICATION_TAG, + + /* Tunnel */ + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_DST_MAC = 385, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_SRC_MAC, + RTE_ETH_INPUT_SET_TUNNEL_L2_INNER_VLAN, + RTE_ETH_INPUT_SET_TUNNEL_L4_UDP_KEY, + RTE_ETH_INPUT_SET_TUNNEL_GRE_KEY, + + /* Flexible Payload */ + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_1ST_WORD = 641, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_2ND_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_3RD_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_4TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_5TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_6TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_7TH_WORD, + RTE_ETH_INPUT_SET_FLEX_PAYLOAD_8TH_WORD, + + RTE_ETH_INPUT_SET_DEFAULT = 65533, + RTE_ETH_INPUT_SET_NONE = 65534, + RTE_ETH_INPUT_SET_MAX = 65535, +}; + +/** + * Filters input set operations + */ +enum rte_filter_input_set_op { + RTE_ETH_INPUT_SET_OP_UNKNOWN, + RTE_ETH_INPUT_SET_SELECT, /**< select input set */ + RTE_ETH_INPUT_SET_ADD, /**< add input set entry */ + RTE_ETH_INPUT_SET_OP_MAX +}; + + +/** + * A structure used to define the input set configuration for + * flow director and hash filters + */ +struct rte_eth_input_set_conf { + uint16_t flow_type; + uint16_t inset_size; + enum rte_eth_input_set_field field[RTE_ETH_INSET_SIZE_MAX]; + enum rte_filter_input_set_op op; +}; + +/** + * A structure used to define the input for L2 flow + */ +struct rte_eth_l2_flow { + uint16_t ether_type; /**< Ether type in big endian */ +}; + +/** + * A structure used to define the input for IPV4 flow + */ +struct rte_eth_ipv4_flow { + uint32_t src_ip; /**< IPv4 source address in big endian. */ + uint32_t dst_ip; /**< IPv4 destination address in big endian. */ + uint8_t tos; /**< Type of service to match. */ + uint8_t ttl; /**< Time to live to match. */ + uint8_t proto; /**< Protocol, next header in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 UDP flow + */ +struct rte_eth_udpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< UDP source port in big endian. */ + uint16_t dst_port; /**< UDP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 TCP flow + */ +struct rte_eth_tcpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< TCP source port in big endian. */ + uint16_t dst_port; /**< TCP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV4 SCTP flow + */ +struct rte_eth_sctpv4_flow { + struct rte_eth_ipv4_flow ip; /**< IPv4 fields to match. */ + uint16_t src_port; /**< SCTP source port in big endian. */ + uint16_t dst_port; /**< SCTP destination port in big endian. */ + uint32_t verify_tag; /**< Verify tag in big endian */ +}; + +/** + * A structure used to define the input for IPV6 flow + */ +struct rte_eth_ipv6_flow { + uint32_t src_ip[4]; /**< IPv6 source address in big endian. */ + uint32_t dst_ip[4]; /**< IPv6 destination address in big endian. */ + uint8_t tc; /**< Traffic class to match. */ + uint8_t proto; /**< Protocol, next header to match. */ + uint8_t hop_limits; /**< Hop limits to match. */ +}; + +/** + * A structure used to define the input for IPV6 UDP flow + */ +struct rte_eth_udpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< UDP source port in big endian. */ + uint16_t dst_port; /**< UDP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV6 TCP flow + */ +struct rte_eth_tcpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< TCP source port to in big endian. */ + uint16_t dst_port; /**< TCP destination port in big endian. */ +}; + +/** + * A structure used to define the input for IPV6 SCTP flow + */ +struct rte_eth_sctpv6_flow { + struct rte_eth_ipv6_flow ip; /**< IPv6 fields to match. */ + uint16_t src_port; /**< SCTP source port in big endian. */ + uint16_t dst_port; /**< SCTP destination port in big endian. */ + uint32_t verify_tag; /**< Verify tag in big endian. */ +}; + +/** + * A structure used to define the input for MAC VLAN flow + */ +struct rte_eth_mac_vlan_flow { + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * Tunnel type for flow director. + */ +enum rte_eth_fdir_tunnel_type { + RTE_FDIR_TUNNEL_TYPE_UNKNOWN = 0, + RTE_FDIR_TUNNEL_TYPE_NVGRE, + RTE_FDIR_TUNNEL_TYPE_VXLAN, +}; + +/** + * A structure used to define the input for tunnel flow, now it's VxLAN or + * NVGRE + */ +struct rte_eth_tunnel_flow { + enum rte_eth_fdir_tunnel_type tunnel_type; /**< Tunnel type to match. */ + /** Tunnel ID to match. TNI, VNI... in big endian. */ + uint32_t tunnel_id; + struct ether_addr mac_addr; /**< Mac address to match. */ +}; + +/** + * An union contains the inputs for all types of flow + * Items in flows need to be in big endian + */ +union rte_eth_fdir_flow { + struct rte_eth_l2_flow l2_flow; + struct rte_eth_udpv4_flow udp4_flow; + struct rte_eth_tcpv4_flow tcp4_flow; + struct rte_eth_sctpv4_flow sctp4_flow; + struct rte_eth_ipv4_flow ip4_flow; + struct rte_eth_udpv6_flow udp6_flow; + struct rte_eth_tcpv6_flow tcp6_flow; + struct rte_eth_sctpv6_flow sctp6_flow; + struct rte_eth_ipv6_flow ipv6_flow; + struct rte_eth_mac_vlan_flow mac_vlan_flow; + struct rte_eth_tunnel_flow tunnel_flow; +}; + +/** + * A structure used to contain extend input of flow + */ +struct rte_eth_fdir_flow_ext { + uint16_t vlan_tci; + uint8_t flexbytes[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< It is filled by the flexible payload to match. */ + uint8_t is_vf; /**< 1 for VF, 0 for port dev */ + uint16_t dst_id; /**< VF ID, available when is_vf is 1*/ +}; + +/** + * A structure used to define the input for a flow director filter entry + */ +struct rte_eth_fdir_input { + uint16_t flow_type; + union rte_eth_fdir_flow flow; + /**< Flow fields to match, dependent on flow_type */ + struct rte_eth_fdir_flow_ext flow_ext; + /**< Additional fields to match */ +}; + +/** + * Behavior will be taken if FDIR match + */ +enum rte_eth_fdir_behavior { + RTE_ETH_FDIR_ACCEPT = 0, + RTE_ETH_FDIR_REJECT, + RTE_ETH_FDIR_PASSTHRU, +}; + +/** + * Flow director report status + * It defines what will be reported if FDIR entry is matched. + */ +enum rte_eth_fdir_status { + RTE_ETH_FDIR_NO_REPORT_STATUS = 0, /**< Report nothing. */ + RTE_ETH_FDIR_REPORT_ID, /**< Only report FD ID. */ + RTE_ETH_FDIR_REPORT_ID_FLEX_4, /**< Report FD ID and 4 flex bytes. */ + RTE_ETH_FDIR_REPORT_FLEX_8, /**< Report 8 flex bytes. */ +}; + +/** + * A structure used to define an action when match FDIR packet filter. + */ +struct rte_eth_fdir_action { + uint16_t rx_queue; /**< Queue assigned to if FDIR match. */ + enum rte_eth_fdir_behavior behavior; /**< Behavior will be taken */ + enum rte_eth_fdir_status report_status; /**< Status report option */ + uint8_t flex_off; + /**< If report_status is RTE_ETH_FDIR_REPORT_ID_FLEX_4 or + RTE_ETH_FDIR_REPORT_FLEX_8, flex_off specifies where the reported + flex bytes start from in flexible payload. */ +}; + +/** + * A structure used to define the flow director filter entry by filter_ctrl API + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_ADD and + * RTE_ETH_FILTER_DELETE operations. + */ +struct rte_eth_fdir_filter { + uint32_t soft_id; + /**< ID, an unique value is required when deal with FDIR entry */ + struct rte_eth_fdir_input input; /**< Input set */ + struct rte_eth_fdir_action action; /**< Action taken when match */ +}; + +/** + * A structure used to configure FDIR masks that are used by the device + * to match the various fields of RX packet headers. + */ +struct rte_eth_fdir_masks { + uint16_t vlan_tci_mask; /**< Bit mask for vlan_tci in big endian */ + /** Bit mask for ipv4 flow in big endian. */ + struct rte_eth_ipv4_flow ipv4_mask; + /** Bit maks for ipv6 flow in big endian. */ + struct rte_eth_ipv6_flow ipv6_mask; + /** Bit mask for L4 source port in big endian. */ + uint16_t src_port_mask; + /** Bit mask for L4 destination port in big endian. */ + uint16_t dst_port_mask; + /** 6 bit mask for proper 6 bytes of Mac address, bit 0 matches the + first byte on the wire */ + uint8_t mac_addr_byte_mask; + /** Bit mask for tunnel ID in big endian. */ + uint32_t tunnel_id_mask; + uint8_t tunnel_type_mask; /**< 1 - Match tunnel type, + 0 - Ignore tunnel type. */ +}; + +/** + * Payload type + */ +enum rte_eth_payload_type { + RTE_ETH_PAYLOAD_UNKNOWN = 0, + RTE_ETH_RAW_PAYLOAD, + RTE_ETH_L2_PAYLOAD, + RTE_ETH_L3_PAYLOAD, + RTE_ETH_L4_PAYLOAD, + RTE_ETH_PAYLOAD_MAX = 8, +}; + +/** + * A structure used to select bytes extracted from the protocol layers to + * flexible payload for filter + */ +struct rte_eth_flex_payload_cfg { + enum rte_eth_payload_type type; /**< Payload type */ + uint16_t src_offset[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Offset in bytes from the beginning of packet's payload + src_offset[i] indicates the flexbyte i's offset in original + packet payload. This value should be less than + flex_payload_limit in struct rte_eth_fdir_info.*/ +}; + +/** + * A structure used to define FDIR masks for flexible payload + * for each flow type + */ +struct rte_eth_fdir_flex_mask { + uint16_t flow_type; + uint8_t mask[RTE_ETH_FDIR_MAX_FLEXLEN]; + /**< Mask for the whole flexible payload */ +}; + +/** + * A structure used to define all flexible payload related setting + * include flex payload and flex mask + */ +struct rte_eth_fdir_flex_conf { + uint16_t nb_payloads; /**< The number of following payload cfg */ + uint16_t nb_flexmasks; /**< The number of following mask */ + struct rte_eth_flex_payload_cfg flex_set[RTE_ETH_PAYLOAD_MAX]; + /**< Flex payload configuration for each payload type */ + struct rte_eth_fdir_flex_mask flex_mask[RTE_ETH_FLOW_MAX]; + /**< Flex mask configuration for each flow type */ +}; + +/** + * Flow Director setting modes: none, signature or perfect. + */ +enum rte_fdir_mode { + RTE_FDIR_MODE_NONE = 0, /**< Disable FDIR support. */ + RTE_FDIR_MODE_SIGNATURE, /**< Enable FDIR signature filter mode. */ + RTE_FDIR_MODE_PERFECT, /**< Enable FDIR perfect filter mode. */ + RTE_FDIR_MODE_PERFECT_MAC_VLAN, /**< Enable FDIR filter mode - MAC VLAN. */ + RTE_FDIR_MODE_PERFECT_TUNNEL, /**< Enable FDIR filter mode - tunnel. */ +}; + +#define UINT32_BIT (CHAR_BIT * sizeof(uint32_t)) +#define RTE_FLOW_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT32_BIT)/UINT32_BIT) + +/** + * A structure used to get the information of flow director filter. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_INFO operation. + * It includes the mode, flexible payload configuration information, + * capabilities and supported flow types, flexible payload characters. + * It can be gotten to help taking specific configurations per device. + */ +struct rte_eth_fdir_info { + enum rte_fdir_mode mode; /**< Flow director mode */ + struct rte_eth_fdir_masks mask; + /** Flex payload configuration information */ + struct rte_eth_fdir_flex_conf flex_conf; + uint32_t guarant_spc; /**< Guaranteed spaces.*/ + uint32_t best_spc; /**< Best effort spaces.*/ + /** Bit mask for every supported flow type. */ + uint32_t flow_types_mask[RTE_FLOW_MASK_ARRAY_SIZE]; + uint32_t max_flexpayload; /**< Total flex payload in bytes. */ + /** Flexible payload unit in bytes. Size and alignments of all flex + payload segments should be multiplies of this value. */ + uint32_t flex_payload_unit; + /** Max number of flexible payload continuous segments. + Each segment should be a multiple of flex_payload_unit.*/ + uint32_t max_flex_payload_segment_num; + /** Maximum src_offset in bytes allowed. It indicates that + src_offset[i] in struct rte_eth_flex_payload_cfg should be less + than this value. */ + uint16_t flex_payload_limit; + /** Flex bitmask unit in bytes. Size of flex bitmasks should be a + multiply of this value. */ + uint32_t flex_bitmask_unit; + /** Max supported size of flex bitmasks in flex_bitmask_unit */ + uint32_t max_flex_bitmask_num; +}; + +/** + * A structure used to define the statistics of flow director. + * It supports RTE_ETH_FILTER_FDIR with RTE_ETH_FILTER_STATS operation. + */ +struct rte_eth_fdir_stats { + uint32_t collision; /**< Number of filters with collision. */ + uint32_t free; /**< Number of free filters. */ + uint32_t maxhash; + /**< The lookup hash value of the added filter that updated the value + of the MAXLEN field */ + uint32_t maxlen; /**< Longest linked list of filters. */ + uint64_t add; /**< Number of added filters. */ + uint64_t remove; /**< Number of removed filters. */ + uint64_t f_add; /**< Number of failed added filters. */ + uint64_t f_remove; /**< Number of failed removed filters. */ + uint32_t guarant_cnt; /**< Number of filters in guaranteed spaces. */ + uint32_t best_cnt; /**< Number of filters in best effort spaces. */ +}; + +/** + * Flow Director filter information types. + */ +enum rte_eth_fdir_filter_info_type { + RTE_ETH_FDIR_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Flow Director filter input set configuration */ + RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT, + RTE_ETH_FDIR_FILTER_INFO_TYPE_MAX, +}; + +/** + * A structure used to set FDIR filter information, to support filter type + * of 'RTE_ETH_FILTER_FDIR' RTE_ETH_FDIR_FILTER_INPUT_SET_SELECT operation. + */ +struct rte_eth_fdir_filter_info { + enum rte_eth_fdir_filter_info_type info_type; /**< Information type */ + /** Details of fdir filter information */ + union { + /** Flow Director input set configuration per port */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +/** + * Hash filter information types. + * - RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT is for getting/setting the + * information/configuration of 'symmetric hash enable' per port. + * - RTE_ETH_HASH_FILTER_GLOBAL_CONFIG is for getting/setting the global + * configurations of hash filters. Those global configurations are valid + * for all ports of the same NIC. + * - RTE_ETH_HASH_FILTER_INPUT_SET_SELECT is for setting the global + * hash input set fields + */ +enum rte_eth_hash_filter_info_type { + RTE_ETH_HASH_FILTER_INFO_TYPE_UNKNOWN = 0, + /** Symmetric hash enable per port */ + RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT, + /** Configure globally for hash filter */ + RTE_ETH_HASH_FILTER_GLOBAL_CONFIG, + /** Global Hash filter input set configuration */ + RTE_ETH_HASH_FILTER_INPUT_SET_SELECT, + RTE_ETH_HASH_FILTER_INFO_TYPE_MAX, +}; + +/** + * Hash function types. + */ +enum rte_eth_hash_function { + RTE_ETH_HASH_FUNCTION_DEFAULT = 0, + RTE_ETH_HASH_FUNCTION_TOEPLITZ, /**< Toeplitz */ + RTE_ETH_HASH_FUNCTION_SIMPLE_XOR, /**< Simple XOR */ + RTE_ETH_HASH_FUNCTION_MAX, +}; + +#define RTE_SYM_HASH_MASK_ARRAY_SIZE \ + (RTE_ALIGN(RTE_ETH_FLOW_MAX, UINT32_BIT)/UINT32_BIT) +/** + * A structure used to set or get global hash function configurations which + * include symmetric hash enable per flow type and hash function type. + * Each bit in sym_hash_enable_mask[] indicates if the symmetric hash of the + * corresponding flow type is enabled or not. + * Each bit in valid_bit_mask[] indicates if the corresponding bit in + * sym_hash_enable_mask[] is valid or not. For the configurations gotten, it + * also means if the flow type is supported by hardware or not. + */ +struct rte_eth_hash_global_conf { + enum rte_eth_hash_function hash_func; /**< Hash function type */ + /** Bit mask for symmetric hash enable per flow type */ + uint32_t sym_hash_enable_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; + /** Bit mask indicates if the corresponding bit is valid */ + uint32_t valid_bit_mask[RTE_SYM_HASH_MASK_ARRAY_SIZE]; +}; + +/** + * A structure used to set or get hash filter information, to support filter + * type of 'RTE_ETH_FILTER_HASH' and its operations. + */ +struct rte_eth_hash_filter_info { + enum rte_eth_hash_filter_info_type info_type; /**< Information type */ + /** Details of hash filter information */ + union { + /** For RTE_ETH_HASH_FILTER_SYM_HASH_ENA_PER_PORT */ + uint8_t enable; + /** Global configurations of hash filter */ + struct rte_eth_hash_global_conf global_conf; + /** Global configurations of hash filter input set */ + struct rte_eth_input_set_conf input_set_conf; + } info; +}; + +/** + * l2 tunnel configuration. + */ +struct rte_eth_l2_tunnel_conf { + enum rte_eth_tunnel_type l2_tunnel_type; + uint16_t ether_type; /* ether type in l2 header */ + uint32_t tunnel_id; /* port tag id for e-tag */ + uint16_t vf_id; /* VF id for tag insertion */ + uint32_t pool; /* destination pool for tag based forwarding */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETH_CTRL_H_ */ diff --git a/lib/librte_ether/rte_ethdev.c b/lib/librte_ether/rte_ethdev.c new file mode 100644 index 00000000..a31018e8 --- /dev/null +++ b/lib/librte_ether/rte_ethdev.c @@ -0,0 +1,3371 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ether.h" +#include "rte_ethdev.h" + +static const char *MZ_RTE_ETH_DEV_DATA = "rte_eth_dev_data"; +struct rte_eth_dev rte_eth_devices[RTE_MAX_ETHPORTS]; +static struct rte_eth_dev_data *rte_eth_dev_data; +static uint8_t nb_ports; + +/* spinlock for eth device callbacks */ +static rte_spinlock_t rte_eth_dev_cb_lock = RTE_SPINLOCK_INITIALIZER; + +/* store statistics names and its offset in stats structure */ +struct rte_eth_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned offset; +}; + +static const struct rte_eth_xstats_name_off rte_stats_strings[] = { + {"rx_good_packets", offsetof(struct rte_eth_stats, ipackets)}, + {"tx_good_packets", offsetof(struct rte_eth_stats, opackets)}, + {"rx_good_bytes", offsetof(struct rte_eth_stats, ibytes)}, + {"tx_good_bytes", offsetof(struct rte_eth_stats, obytes)}, + {"rx_errors", offsetof(struct rte_eth_stats, ierrors)}, + {"tx_errors", offsetof(struct rte_eth_stats, oerrors)}, + {"rx_mbuf_allocation_errors", offsetof(struct rte_eth_stats, + rx_nombuf)}, +}; + +#define RTE_NB_STATS (sizeof(rte_stats_strings) / sizeof(rte_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_rxq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_ipackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_ibytes)}, + {"errors", offsetof(struct rte_eth_stats, q_errors)}, +}; + +#define RTE_NB_RXQ_STATS (sizeof(rte_rxq_stats_strings) / \ + sizeof(rte_rxq_stats_strings[0])) + +static const struct rte_eth_xstats_name_off rte_txq_stats_strings[] = { + {"packets", offsetof(struct rte_eth_stats, q_opackets)}, + {"bytes", offsetof(struct rte_eth_stats, q_obytes)}, +}; +#define RTE_NB_TXQ_STATS (sizeof(rte_txq_stats_strings) / \ + sizeof(rte_txq_stats_strings[0])) + + +/** + * The user application callback description. + * + * It contains callback address to be registered by user application, + * the pointer to the parameters for callback, and the event type. + */ +struct rte_eth_dev_callback { + TAILQ_ENTRY(rte_eth_dev_callback) next; /**< Callbacks list */ + rte_eth_dev_cb_fn cb_fn; /**< Callback address */ + void *cb_arg; /**< Parameter for callback */ + enum rte_eth_event_type event; /**< Interrupt event type */ + uint32_t active; /**< Callback is executing */ +}; + +enum { + STAT_QMAP_TX = 0, + STAT_QMAP_RX +}; + +enum { + DEV_DETACHED = 0, + DEV_ATTACHED +}; + +static void +rte_eth_dev_data_alloc(void) +{ + const unsigned flags = 0; + const struct rte_memzone *mz; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + mz = rte_memzone_reserve(MZ_RTE_ETH_DEV_DATA, + RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data), + rte_socket_id(), flags); + } else + mz = rte_memzone_lookup(MZ_RTE_ETH_DEV_DATA); + if (mz == NULL) + rte_panic("Cannot allocate memzone for ethernet port data\n"); + + rte_eth_dev_data = mz->addr; + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + memset(rte_eth_dev_data, 0, + RTE_MAX_ETHPORTS * sizeof(*rte_eth_dev_data)); +} + +struct rte_eth_dev * +rte_eth_dev_allocated(const char *name) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if ((rte_eth_devices[i].attached == DEV_ATTACHED) && + strcmp(rte_eth_devices[i].data->name, name) == 0) + return &rte_eth_devices[i]; + } + return NULL; +} + +static uint8_t +rte_eth_dev_find_free_port(void) +{ + unsigned i; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + if (rte_eth_devices[i].attached == DEV_DETACHED) + return i; + } + return RTE_MAX_ETHPORTS; +} + +struct rte_eth_dev * +rte_eth_dev_allocate(const char *name, enum rte_eth_dev_type type) +{ + uint8_t port_id; + struct rte_eth_dev *eth_dev; + + port_id = rte_eth_dev_find_free_port(); + if (port_id == RTE_MAX_ETHPORTS) { + RTE_PMD_DEBUG_TRACE("Reached maximum number of Ethernet ports\n"); + return NULL; + } + + if (rte_eth_dev_data == NULL) + rte_eth_dev_data_alloc(); + + if (rte_eth_dev_allocated(name) != NULL) { + RTE_PMD_DEBUG_TRACE("Ethernet Device with name %s already allocated!\n", + name); + return NULL; + } + + eth_dev = &rte_eth_devices[port_id]; + eth_dev->data = &rte_eth_dev_data[port_id]; + snprintf(eth_dev->data->name, sizeof(eth_dev->data->name), "%s", name); + eth_dev->data->port_id = port_id; + eth_dev->attached = DEV_ATTACHED; + eth_dev->dev_type = type; + nb_ports++; + return eth_dev; +} + +static int +rte_eth_dev_create_unique_device_name(char *name, size_t size, + struct rte_pci_device *pci_dev) +{ + int ret; + + ret = snprintf(name, size, "%d:%d.%d", + pci_dev->addr.bus, pci_dev->addr.devid, + pci_dev->addr.function); + if (ret < 0) + return ret; + return 0; +} + +int +rte_eth_dev_release_port(struct rte_eth_dev *eth_dev) +{ + if (eth_dev == NULL) + return -EINVAL; + + eth_dev->attached = DEV_DETACHED; + nb_ports--; + return 0; +} + +static int +rte_eth_dev_init(struct rte_pci_driver *pci_drv, + struct rte_pci_device *pci_dev) +{ + struct eth_driver *eth_drv; + struct rte_eth_dev *eth_dev; + char ethdev_name[RTE_ETH_NAME_MAX_LEN]; + + int diag; + + eth_drv = (struct eth_driver *)pci_drv; + + /* Create unique Ethernet device name using PCI address */ + rte_eth_dev_create_unique_device_name(ethdev_name, + sizeof(ethdev_name), pci_dev); + + eth_dev = rte_eth_dev_allocate(ethdev_name, RTE_ETH_DEV_PCI); + if (eth_dev == NULL) + return -ENOMEM; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev->data->dev_private = rte_zmalloc("ethdev private structure", + eth_drv->dev_private_size, + RTE_CACHE_LINE_SIZE); + if (eth_dev->data->dev_private == NULL) + rte_panic("Cannot allocate memzone for private port data\n"); + } + eth_dev->pci_dev = pci_dev; + eth_dev->driver = eth_drv; + eth_dev->data->rx_mbuf_alloc_failed = 0; + + /* init user callbacks */ + TAILQ_INIT(&(eth_dev->link_intr_cbs)); + + /* + * Set the default MTU. + */ + eth_dev->data->mtu = ETHER_MTU; + + /* Invoke PMD device initialization function */ + diag = (*eth_drv->eth_dev_init)(eth_dev); + if (diag == 0) + return 0; + + RTE_PMD_DEBUG_TRACE("driver %s: eth_dev_init(vendor_id=0x%u device_id=0x%x) failed\n", + pci_drv->name, + (unsigned) pci_dev->id.vendor_id, + (unsigned) pci_dev->id.device_id); + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + rte_eth_dev_release_port(eth_dev); + return diag; +} + +static int +rte_eth_dev_uninit(struct rte_pci_device *pci_dev) +{ + const struct eth_driver *eth_drv; + struct rte_eth_dev *eth_dev; + char ethdev_name[RTE_ETH_NAME_MAX_LEN]; + int ret; + + if (pci_dev == NULL) + return -EINVAL; + + /* Create unique Ethernet device name using PCI address */ + rte_eth_dev_create_unique_device_name(ethdev_name, + sizeof(ethdev_name), pci_dev); + + eth_dev = rte_eth_dev_allocated(ethdev_name); + if (eth_dev == NULL) + return -ENODEV; + + eth_drv = (const struct eth_driver *)pci_dev->driver; + + /* Invoke PMD device uninit function */ + if (*eth_drv->eth_dev_uninit) { + ret = (*eth_drv->eth_dev_uninit)(eth_dev); + if (ret) + return ret; + } + + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + + eth_dev->pci_dev = NULL; + eth_dev->driver = NULL; + eth_dev->data = NULL; + + return 0; +} + +/** + * Register an Ethernet [Poll Mode] driver. + * + * Function invoked by the initialization function of an Ethernet driver + * to simultaneously register itself as a PCI driver and as an Ethernet + * Poll Mode Driver. + * Invokes the rte_eal_pci_register() function to register the *pci_drv* + * structure embedded in the *eth_drv* structure, after having stored the + * address of the rte_eth_dev_init() function in the *devinit* field of + * the *pci_drv* structure. + * During the PCI probing phase, the rte_eth_dev_init() function is + * invoked for each PCI [Ethernet device] matching the embedded PCI + * identifiers provided by the driver. + */ +void +rte_eth_driver_register(struct eth_driver *eth_drv) +{ + eth_drv->pci_drv.devinit = rte_eth_dev_init; + eth_drv->pci_drv.devuninit = rte_eth_dev_uninit; + rte_eal_pci_register(ð_drv->pci_drv); +} + +int +rte_eth_dev_is_valid_port(uint8_t port_id) +{ + if (port_id >= RTE_MAX_ETHPORTS || + rte_eth_devices[port_id].attached != DEV_ATTACHED) + return 0; + else + return 1; +} + +int +rte_eth_dev_socket_id(uint8_t port_id) +{ + if (!rte_eth_dev_is_valid_port(port_id)) + return -1; + return rte_eth_devices[port_id].data->numa_node; +} + +uint8_t +rte_eth_dev_count(void) +{ + return nb_ports; +} + +static enum rte_eth_dev_type +rte_eth_dev_get_device_type(uint8_t port_id) +{ + if (!rte_eth_dev_is_valid_port(port_id)) + return RTE_ETH_DEV_UNKNOWN; + return rte_eth_devices[port_id].dev_type; +} + +static int +rte_eth_dev_get_addr_by_port(uint8_t port_id, struct rte_pci_addr *addr) +{ + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (addr == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + *addr = rte_eth_devices[port_id].pci_dev->addr; + return 0; +} + +static int +rte_eth_dev_get_name_by_port(uint8_t port_id, char *name) +{ + char *tmp; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (name == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + /* shouldn't check 'rte_eth_devices[i].data', + * because it might be overwritten by VDEV PMD */ + tmp = rte_eth_dev_data[port_id].name; + strcpy(name, tmp); + return 0; +} + +static int +rte_eth_dev_get_port_by_name(const char *name, uint8_t *port_id) +{ + int i; + + if (name == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + *port_id = RTE_MAX_ETHPORTS; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + + if (!strncmp(name, + rte_eth_dev_data[i].name, strlen(name))) { + + *port_id = i; + + return 0; + } + } + return -ENODEV; +} + +static int +rte_eth_dev_get_port_by_addr(const struct rte_pci_addr *addr, uint8_t *port_id) +{ + int i; + struct rte_pci_device *pci_dev = NULL; + + if (addr == NULL) { + RTE_PMD_DEBUG_TRACE("Null pointer is specified\n"); + return -EINVAL; + } + + *port_id = RTE_MAX_ETHPORTS; + + for (i = 0; i < RTE_MAX_ETHPORTS; i++) { + + pci_dev = rte_eth_devices[i].pci_dev; + + if (pci_dev && + !rte_eal_compare_pci_addr(&pci_dev->addr, addr)) { + + *port_id = i; + + return 0; + } + } + return -ENODEV; +} + +static int +rte_eth_dev_is_detachable(uint8_t port_id) +{ + uint32_t dev_flags; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -EINVAL; + } + + switch (rte_eth_devices[port_id].data->kdrv) { + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + case RTE_KDRV_NIC_UIO: + case RTE_KDRV_NONE: + break; + case RTE_KDRV_VFIO: + default: + return -ENOTSUP; + } + dev_flags = rte_eth_devices[port_id].data->dev_flags; + if ((dev_flags & RTE_ETH_DEV_DETACHABLE) && + (!(dev_flags & RTE_ETH_DEV_BONDED_SLAVE))) + return 0; + else + return 1; +} + +/* attach the new physical device, then store port_id of the device */ +static int +rte_eth_dev_attach_pdev(struct rte_pci_addr *addr, uint8_t *port_id) +{ + /* re-construct pci_device_list */ + if (rte_eal_pci_scan()) + goto err; + /* Invoke probe func of the driver can handle the new device. */ + if (rte_eal_pci_probe_one(addr)) + goto err; + + if (rte_eth_dev_get_port_by_addr(addr, port_id)) + goto err; + + return 0; +err: + return -1; +} + +/* detach the new physical device, then store pci_addr of the device */ +static int +rte_eth_dev_detach_pdev(uint8_t port_id, struct rte_pci_addr *addr) +{ + struct rte_pci_addr freed_addr; + struct rte_pci_addr vp; + + /* get pci address by port id */ + if (rte_eth_dev_get_addr_by_port(port_id, &freed_addr)) + goto err; + + /* Zeroed pci addr means the port comes from virtual device */ + vp.domain = vp.bus = vp.devid = vp.function = 0; + if (rte_eal_compare_pci_addr(&vp, &freed_addr) == 0) + goto err; + + /* invoke devuninit func of the pci driver, + * also remove the device from pci_device_list */ + if (rte_eal_pci_detach(&freed_addr)) + goto err; + + *addr = freed_addr; + return 0; +err: + return -1; +} + +/* attach the new virtual device, then store port_id of the device */ +static int +rte_eth_dev_attach_vdev(const char *vdevargs, uint8_t *port_id) +{ + char *name = NULL, *args = NULL; + int ret = -1; + + /* parse vdevargs, then retrieve device name and args */ + if (rte_eal_parse_devargs_str(vdevargs, &name, &args)) + goto end; + + /* walk around dev_driver_list to find the driver of the device, + * then invoke probe function of the driver. + * rte_eal_vdev_init() updates port_id allocated after + * initialization. + */ + if (rte_eal_vdev_init(name, args)) + goto end; + + if (rte_eth_dev_get_port_by_name(name, port_id)) + goto end; + + ret = 0; +end: + free(name); + free(args); + + return ret; +} + +/* detach the new virtual device, then store the name of the device */ +static int +rte_eth_dev_detach_vdev(uint8_t port_id, char *vdevname) +{ + char name[RTE_ETH_NAME_MAX_LEN]; + + /* get device name by port id */ + if (rte_eth_dev_get_name_by_port(port_id, name)) + goto err; + /* walk around dev_driver_list to find the driver of the device, + * then invoke uninit function of the driver */ + if (rte_eal_vdev_uninit(name)) + goto err; + + strncpy(vdevname, name, sizeof(name)); + return 0; +err: + return -1; +} + +/* attach the new device, then store port_id of the device */ +int +rte_eth_dev_attach(const char *devargs, uint8_t *port_id) +{ + struct rte_pci_addr addr; + int ret = -1; + + if ((devargs == NULL) || (port_id == NULL)) { + ret = -EINVAL; + goto err; + } + + if (eal_parse_pci_DomBDF(devargs, &addr) == 0) { + ret = rte_eth_dev_attach_pdev(&addr, port_id); + if (ret < 0) + goto err; + } else { + ret = rte_eth_dev_attach_vdev(devargs, port_id); + if (ret < 0) + goto err; + } + + return 0; +err: + RTE_LOG(ERR, EAL, "Driver, cannot attach the device\n"); + return ret; +} + +/* detach the device, then store the name of the device */ +int +rte_eth_dev_detach(uint8_t port_id, char *name) +{ + struct rte_pci_addr addr; + int ret = -1; + + if (name == NULL) { + ret = -EINVAL; + goto err; + } + + /* check whether the driver supports detach feature, or not */ + if (rte_eth_dev_is_detachable(port_id)) + goto err; + + if (rte_eth_dev_get_device_type(port_id) == RTE_ETH_DEV_PCI) { + ret = rte_eth_dev_get_addr_by_port(port_id, &addr); + if (ret < 0) + goto err; + + ret = rte_eth_dev_detach_pdev(port_id, &addr); + if (ret < 0) + goto err; + + snprintf(name, RTE_ETH_NAME_MAX_LEN, + "%04x:%02x:%02x.%d", + addr.domain, addr.bus, + addr.devid, addr.function); + } else { + ret = rte_eth_dev_detach_vdev(port_id, name); + if (ret < 0) + goto err; + } + + return 0; + +err: + RTE_LOG(ERR, EAL, "Driver, cannot detach the device\n"); + return ret; +} + +static int +rte_eth_dev_rx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_rx_queues; + void **rxq; + unsigned i; + + if (dev->data->rx_queues == NULL && nb_queues != 0) { /* first time configuration */ + dev->data->rx_queues = rte_zmalloc("ethdev->rx_queues", + sizeof(dev->data->rx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->rx_queues == NULL) { + dev->data->nb_rx_queues = 0; + return -(ENOMEM); + } + } else if (dev->data->rx_queues != NULL && nb_queues != 0) { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); + + rxq = dev->data->rx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->rx_queue_release)(rxq[i]); + rxq = rte_realloc(rxq, sizeof(rxq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (rxq == NULL) + return -(ENOMEM); + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(rxq + old_nb_queues, 0, + sizeof(rxq[0]) * new_qs); + } + + dev->data->rx_queues = rxq; + + } else if (dev->data->rx_queues != NULL && nb_queues == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_release, -ENOTSUP); + + rxq = dev->data->rx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->rx_queue_release)(rxq[i]); + } + dev->data->nb_rx_queues = nb_queues; + return 0; +} + +int +rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_start, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already started\n", + rx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->rx_queue_start(dev, rx_queue_id); + +} + +int +rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_stop, -ENOTSUP); + + if (dev->data->rx_queue_state[rx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already stopped\n", + rx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->rx_queue_stop(dev, rx_queue_id); + +} + +int +rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_start, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] != RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already started\n", + tx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->tx_queue_start(dev, tx_queue_id); + +} + +int +rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_stop, -ENOTSUP); + + if (dev->data->tx_queue_state[tx_queue_id] == RTE_ETH_QUEUE_STATE_STOPPED) { + RTE_PMD_DEBUG_TRACE("Queue %" PRIu16" of device with port_id=%" PRIu8 + " already stopped\n", + tx_queue_id, port_id); + return 0; + } + + return dev->dev_ops->tx_queue_stop(dev, tx_queue_id); + +} + +static int +rte_eth_dev_tx_queue_config(struct rte_eth_dev *dev, uint16_t nb_queues) +{ + uint16_t old_nb_queues = dev->data->nb_tx_queues; + void **txq; + unsigned i; + + if (dev->data->tx_queues == NULL && nb_queues != 0) { /* first time configuration */ + dev->data->tx_queues = rte_zmalloc("ethdev->tx_queues", + sizeof(dev->data->tx_queues[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (dev->data->tx_queues == NULL) { + dev->data->nb_tx_queues = 0; + return -(ENOMEM); + } + } else if (dev->data->tx_queues != NULL && nb_queues != 0) { /* re-configure */ + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); + + txq = dev->data->tx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->tx_queue_release)(txq[i]); + txq = rte_realloc(txq, sizeof(txq[0]) * nb_queues, + RTE_CACHE_LINE_SIZE); + if (txq == NULL) + return -ENOMEM; + if (nb_queues > old_nb_queues) { + uint16_t new_qs = nb_queues - old_nb_queues; + + memset(txq + old_nb_queues, 0, + sizeof(txq[0]) * new_qs); + } + + dev->data->tx_queues = txq; + + } else if (dev->data->tx_queues != NULL && nb_queues == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_release, -ENOTSUP); + + txq = dev->data->tx_queues; + + for (i = nb_queues; i < old_nb_queues; i++) + (*dev->dev_ops->tx_queue_release)(txq[i]); + } + dev->data->nb_tx_queues = nb_queues; + return 0; +} + +uint32_t +rte_eth_speed_bitflag(uint32_t speed, int duplex) +{ + switch (speed) { + case ETH_SPEED_NUM_10M: + return duplex ? ETH_LINK_SPEED_10M : ETH_LINK_SPEED_10M_HD; + case ETH_SPEED_NUM_100M: + return duplex ? ETH_LINK_SPEED_100M : ETH_LINK_SPEED_100M_HD; + case ETH_SPEED_NUM_1G: + return ETH_LINK_SPEED_1G; + case ETH_SPEED_NUM_2_5G: + return ETH_LINK_SPEED_2_5G; + case ETH_SPEED_NUM_5G: + return ETH_LINK_SPEED_5G; + case ETH_SPEED_NUM_10G: + return ETH_LINK_SPEED_10G; + case ETH_SPEED_NUM_20G: + return ETH_LINK_SPEED_20G; + case ETH_SPEED_NUM_25G: + return ETH_LINK_SPEED_25G; + case ETH_SPEED_NUM_40G: + return ETH_LINK_SPEED_40G; + case ETH_SPEED_NUM_50G: + return ETH_LINK_SPEED_50G; + case ETH_SPEED_NUM_56G: + return ETH_LINK_SPEED_56G; + case ETH_SPEED_NUM_100G: + return ETH_LINK_SPEED_100G; + default: + return 0; + } +} + +int +rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_q, uint16_t nb_tx_q, + const struct rte_eth_conf *dev_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + int diag; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + if (nb_rx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_PMD_DEBUG_TRACE( + "Number of RX queues requested (%u) is greater than max supported(%d)\n", + nb_rx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + if (nb_tx_q > RTE_MAX_QUEUES_PER_PORT) { + RTE_PMD_DEBUG_TRACE( + "Number of TX queues requested (%u) is greater than max supported(%d)\n", + nb_tx_q, RTE_MAX_QUEUES_PER_PORT); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_configure, -ENOTSUP); + + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow configuration\n", port_id); + return -EBUSY; + } + + /* Copy the dev_conf parameter into the dev structure */ + memcpy(&dev->data->dev_conf, dev_conf, sizeof(dev->data->dev_conf)); + + /* + * Check that the numbers of RX and TX queues are not greater + * than the maximum number of RX and TX queues supported by the + * configured device. + */ + (*dev->dev_ops->dev_infos_get)(dev, &dev_info); + + if (nb_rx_q == 0 && nb_tx_q == 0) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d both rx and tx queue cannot be 0\n", port_id); + return -EINVAL; + } + + if (nb_rx_q > dev_info.max_rx_queues) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_rx_queues=%d > %d\n", + port_id, nb_rx_q, dev_info.max_rx_queues); + return -EINVAL; + } + + if (nb_tx_q > dev_info.max_tx_queues) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d nb_tx_queues=%d > %d\n", + port_id, nb_tx_q, dev_info.max_tx_queues); + return -EINVAL; + } + + /* + * If link state interrupt is enabled, check that the + * device supports it. + */ + if ((dev_conf->intr_conf.lsc == 1) && + (!(dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC))) { + RTE_PMD_DEBUG_TRACE("driver %s does not support lsc\n", + dev->data->drv_name); + return -EINVAL; + } + + /* + * If jumbo frames are enabled, check that the maximum RX packet + * length is supported by the configured device. + */ + if (dev_conf->rxmode.jumbo_frame == 1) { + if (dev_conf->rxmode.max_rx_pkt_len > + dev_info.max_rx_pktlen) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" + " > max valid value %u\n", + port_id, + (unsigned)dev_conf->rxmode.max_rx_pkt_len, + (unsigned)dev_info.max_rx_pktlen); + return -EINVAL; + } else if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN) { + RTE_PMD_DEBUG_TRACE("ethdev port_id=%d max_rx_pkt_len %u" + " < min valid value %u\n", + port_id, + (unsigned)dev_conf->rxmode.max_rx_pkt_len, + (unsigned)ETHER_MIN_LEN); + return -EINVAL; + } + } else { + if (dev_conf->rxmode.max_rx_pkt_len < ETHER_MIN_LEN || + dev_conf->rxmode.max_rx_pkt_len > ETHER_MAX_LEN) + /* Use default value */ + dev->data->dev_conf.rxmode.max_rx_pkt_len = + ETHER_MAX_LEN; + } + + /* + * Setup new number of RX/TX queues and reconfigure device. + */ + diag = rte_eth_dev_rx_queue_config(dev, nb_rx_q); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_rx_queue_config = %d\n", + port_id, diag); + return diag; + } + + diag = rte_eth_dev_tx_queue_config(dev, nb_tx_q); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d rte_eth_dev_tx_queue_config = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + return diag; + } + + diag = (*dev->dev_ops->dev_configure)(dev); + if (diag != 0) { + RTE_PMD_DEBUG_TRACE("port%d dev_configure = %d\n", + port_id, diag); + rte_eth_dev_rx_queue_config(dev, 0); + rte_eth_dev_tx_queue_config(dev, 0); + return diag; + } + + return 0; +} + +static void +rte_eth_dev_config_restore(uint8_t port_id) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct ether_addr addr; + uint16_t i; + uint32_t pool = 0; + + dev = &rte_eth_devices[port_id]; + + rte_eth_dev_info_get(port_id, &dev_info); + + if (RTE_ETH_DEV_SRIOV(dev).active) + pool = RTE_ETH_DEV_SRIOV(dev).def_vmdq_idx; + + /* replay MAC address configuration */ + for (i = 0; i < dev_info.max_mac_addrs; i++) { + addr = dev->data->mac_addrs[i]; + + /* skip zero address */ + if (is_zero_ether_addr(&addr)) + continue; + + /* add address to the hardware */ + if (*dev->dev_ops->mac_addr_add && + (dev->data->mac_pool_sel[i] & (1ULL << pool))) + (*dev->dev_ops->mac_addr_add)(dev, &addr, i, pool); + else { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array not supported\n", + port_id); + /* exit the loop but not return an error */ + break; + } + } + + /* replay promiscuous configuration */ + if (rte_eth_promiscuous_get(port_id) == 1) + rte_eth_promiscuous_enable(port_id); + else if (rte_eth_promiscuous_get(port_id) == 0) + rte_eth_promiscuous_disable(port_id); + + /* replay all multicast configuration */ + if (rte_eth_allmulticast_get(port_id) == 1) + rte_eth_allmulticast_enable(port_id); + else if (rte_eth_allmulticast_get(port_id) == 0) + rte_eth_allmulticast_disable(port_id); +} + +int +rte_eth_dev_start(uint8_t port_id) +{ + struct rte_eth_dev *dev; + int diag; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_start, -ENOTSUP); + + if (dev->data->dev_started != 0) { + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8 + " already started\n", + port_id); + return 0; + } + + diag = (*dev->dev_ops->dev_start)(dev); + if (diag == 0) + dev->data->dev_started = 1; + else + return diag; + + rte_eth_dev_config_restore(port_id); + + if (dev->data->dev_conf.intr_conf.lsc == 0) { + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->link_update, -ENOTSUP); + (*dev->dev_ops->link_update)(dev, 0); + } + return 0; +} + +void +rte_eth_dev_stop(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_stop); + + if (dev->data->dev_started == 0) { + RTE_PMD_DEBUG_TRACE("Device with port_id=%" PRIu8 + " already stopped\n", + port_id); + return; + } + + dev->data->dev_started = 0; + (*dev->dev_ops->dev_stop)(dev); +} + +int +rte_eth_dev_set_link_up(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_up, -ENOTSUP); + return (*dev->dev_ops->dev_set_link_up)(dev); +} + +int +rte_eth_dev_set_link_down(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_set_link_down, -ENOTSUP); + return (*dev->dev_ops->dev_set_link_down)(dev); +} + +void +rte_eth_dev_close(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_close); + dev->data->dev_started = 0; + (*dev->dev_ops->dev_close)(dev); + + rte_free(dev->data->rx_queues); + dev->data->rx_queues = NULL; + rte_free(dev->data->tx_queues); + dev->data->tx_queues = NULL; +} + +int +rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp) +{ + int ret; + uint32_t mbp_buf_size; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", rx_queue_id); + return -EINVAL; + } + + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow configuration\n", port_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_setup, -ENOTSUP); + + /* + * Check the size of the mbuf data buffer. + * This value must be provided in the private data of the memory pool. + * First check that the memory pool has a valid private data. + */ + rte_eth_dev_info_get(port_id, &dev_info); + if (mp->private_data_size < sizeof(struct rte_pktmbuf_pool_private)) { + RTE_PMD_DEBUG_TRACE("%s private_data_size %d < %d\n", + mp->name, (int) mp->private_data_size, + (int) sizeof(struct rte_pktmbuf_pool_private)); + return -ENOSPC; + } + mbp_buf_size = rte_pktmbuf_data_room_size(mp); + + if ((mbp_buf_size - RTE_PKTMBUF_HEADROOM) < dev_info.min_rx_bufsize) { + RTE_PMD_DEBUG_TRACE("%s mbuf_data_room_size %d < %d " + "(RTE_PKTMBUF_HEADROOM=%d + min_rx_bufsize(dev)" + "=%d)\n", + mp->name, + (int)mbp_buf_size, + (int)(RTE_PKTMBUF_HEADROOM + + dev_info.min_rx_bufsize), + (int)RTE_PKTMBUF_HEADROOM, + (int)dev_info.min_rx_bufsize); + return -EINVAL; + } + + if (nb_rx_desc > dev_info.rx_desc_lim.nb_max || + nb_rx_desc < dev_info.rx_desc_lim.nb_min || + nb_rx_desc % dev_info.rx_desc_lim.nb_align != 0) { + + RTE_PMD_DEBUG_TRACE("Invalid value for nb_rx_desc(=%hu), " + "should be: <= %hu, = %hu, and a product of %hu\n", + nb_rx_desc, + dev_info.rx_desc_lim.nb_max, + dev_info.rx_desc_lim.nb_min, + dev_info.rx_desc_lim.nb_align); + return -EINVAL; + } + + if (rx_conf == NULL) + rx_conf = &dev_info.default_rxconf; + + ret = (*dev->dev_ops->rx_queue_setup)(dev, rx_queue_id, nb_rx_desc, + socket_id, rx_conf, mp); + if (!ret) { + if (!dev->data->min_rx_buf_size || + dev->data->min_rx_buf_size > mbp_buf_size) + dev->data->min_rx_buf_size = mbp_buf_size; + } + + return ret; +} + +int +rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + if (tx_queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", tx_queue_id); + return -EINVAL; + } + + if (dev->data->dev_started) { + RTE_PMD_DEBUG_TRACE( + "port %d must be stopped to allow configuration\n", port_id); + return -EBUSY; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_infos_get, -ENOTSUP); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->tx_queue_setup, -ENOTSUP); + + rte_eth_dev_info_get(port_id, &dev_info); + + if (nb_tx_desc > dev_info.tx_desc_lim.nb_max || + nb_tx_desc < dev_info.tx_desc_lim.nb_min || + nb_tx_desc % dev_info.tx_desc_lim.nb_align != 0) { + RTE_PMD_DEBUG_TRACE("Invalid value for nb_tx_desc(=%hu), " + "should be: <= %hu, = %hu, and a product of %hu\n", + nb_tx_desc, + dev_info.tx_desc_lim.nb_max, + dev_info.tx_desc_lim.nb_min, + dev_info.tx_desc_lim.nb_align); + return -EINVAL; + } + + if (tx_conf == NULL) + tx_conf = &dev_info.default_txconf; + + return (*dev->dev_ops->tx_queue_setup)(dev, tx_queue_id, nb_tx_desc, + socket_id, tx_conf); +} + +void +rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata __rte_unused) +{ + unsigned i; + + for (i = 0; i < unsent; i++) + rte_pktmbuf_free(pkts[i]); +} + +void +rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata) +{ + uint64_t *count = userdata; + unsigned i; + + for (i = 0; i < unsent; i++) + rte_pktmbuf_free(pkts[i]); + + *count += unsent; +} + +int +rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, + buffer_tx_error_fn cbfn, void *userdata) +{ + buffer->error_callback = cbfn; + buffer->error_userdata = userdata; + return 0; +} + +int +rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size) +{ + int ret = 0; + + if (buffer == NULL) + return -EINVAL; + + buffer->size = size; + if (buffer->error_callback == NULL) { + ret = rte_eth_tx_buffer_set_err_callback( + buffer, rte_eth_tx_buffer_drop_callback, NULL); + } + + return ret; +} + +void +rte_eth_promiscuous_enable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_enable); + (*dev->dev_ops->promiscuous_enable)(dev); + dev->data->promiscuous = 1; +} + +void +rte_eth_promiscuous_disable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->promiscuous_disable); + dev->data->promiscuous = 0; + (*dev->dev_ops->promiscuous_disable)(dev); +} + +int +rte_eth_promiscuous_get(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->promiscuous; +} + +void +rte_eth_allmulticast_enable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_enable); + (*dev->dev_ops->allmulticast_enable)(dev); + dev->data->all_multicast = 1; +} + +void +rte_eth_allmulticast_disable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->allmulticast_disable); + dev->data->all_multicast = 0; + (*dev->dev_ops->allmulticast_disable)(dev); +} + +int +rte_eth_allmulticast_get(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + return dev->data->all_multicast; +} + +static inline int +rte_eth_dev_atomic_read_link_status(struct rte_eth_dev *dev, + struct rte_eth_link *link) +{ + struct rte_eth_link *dst = link; + struct rte_eth_link *src = &(dev->data->dev_link); + + if (rte_atomic64_cmpset((uint64_t *)dst, *(uint64_t *)dst, + *(uint64_t *)src) == 0) + return -1; + + return 0; +} + +void +rte_eth_link_get(uint8_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc != 0) + rte_eth_dev_atomic_read_link_status(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 1); + *eth_link = dev->data->dev_link; + } +} + +void +rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *eth_link) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.intr_conf.lsc != 0) + rte_eth_dev_atomic_read_link_status(dev, eth_link); + else { + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->link_update); + (*dev->dev_ops->link_update)(dev, 0); + *eth_link = dev->data->dev_link; + } +} + +int +rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + memset(stats, 0, sizeof(*stats)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->stats_get, -ENOTSUP); + (*dev->dev_ops->stats_get)(dev, stats); + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; + return 0; +} + +void +rte_eth_stats_reset(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->stats_reset); + (*dev->dev_ops->stats_reset)(dev); + dev->data->rx_mbuf_alloc_failed = 0; +} + +/* retrieve ethdev extended statistics */ +int +rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats, + unsigned n) +{ + struct rte_eth_stats eth_stats; + struct rte_eth_dev *dev; + unsigned count = 0, i, q; + signed xcount = 0; + uint64_t val, *stats_ptr; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + + /* Return generic statistics */ + count = RTE_NB_STATS + (dev->data->nb_rx_queues * RTE_NB_RXQ_STATS) + + (dev->data->nb_tx_queues * RTE_NB_TXQ_STATS); + + /* implemented by the driver */ + if (dev->dev_ops->xstats_get != NULL) { + /* Retrieve the xstats from the driver at the end of the + * xstats struct. + */ + xcount = (*dev->dev_ops->xstats_get)(dev, + xstats ? xstats + count : NULL, + (n > count) ? n - count : 0); + + if (xcount < 0) + return xcount; + } + + if (n < count + xcount || xstats == NULL) + return count + xcount; + + /* now fill the xstats structure */ + count = 0; + rte_eth_stats_get(port_id, ð_stats); + + /* global stats */ + for (i = 0; i < RTE_NB_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_stats_strings[i].offset); + val = *stats_ptr; + snprintf(xstats[count].name, sizeof(xstats[count].name), + "%s", rte_stats_strings[i].name); + xstats[count++].value = val; + } + + /* per-rxq stats */ + for (q = 0; q < dev->data->nb_rx_queues; q++) { + for (i = 0; i < RTE_NB_RXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_rxq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + snprintf(xstats[count].name, sizeof(xstats[count].name), + "rx_q%u_%s", q, + rte_rxq_stats_strings[i].name); + xstats[count++].value = val; + } + } + + /* per-txq stats */ + for (q = 0; q < dev->data->nb_tx_queues; q++) { + for (i = 0; i < RTE_NB_TXQ_STATS; i++) { + stats_ptr = RTE_PTR_ADD(ð_stats, + rte_txq_stats_strings[i].offset + + q * sizeof(uint64_t)); + val = *stats_ptr; + snprintf(xstats[count].name, sizeof(xstats[count].name), + "tx_q%u_%s", q, + rte_txq_stats_strings[i].name); + xstats[count++].value = val; + } + } + + return count + xcount; +} + +/* reset ethdev extended statistics */ +void +rte_eth_xstats_reset(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + /* implemented by the driver */ + if (dev->dev_ops->xstats_reset != NULL) { + (*dev->dev_ops->xstats_reset)(dev); + return; + } + + /* fallback to default */ + rte_eth_stats_reset(port_id); +} + +static int +set_queue_stats_mapping(uint8_t port_id, uint16_t queue_id, uint8_t stat_idx, + uint8_t is_rx) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->queue_stats_mapping_set, -ENOTSUP); + return (*dev->dev_ops->queue_stats_mapping_set) + (dev, queue_id, stat_idx, is_rx); +} + + +int +rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, uint16_t tx_queue_id, + uint8_t stat_idx) +{ + return set_queue_stats_mapping(port_id, tx_queue_id, stat_idx, + STAT_QMAP_TX); +} + + +int +rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, uint16_t rx_queue_id, + uint8_t stat_idx) +{ + return set_queue_stats_mapping(port_id, rx_queue_id, stat_idx, + STAT_QMAP_RX); +} + + +void +rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info) +{ + struct rte_eth_dev *dev; + const struct rte_eth_desc_lim lim = { + .nb_max = UINT16_MAX, + .nb_min = 0, + .nb_align = 1, + }; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + + memset(dev_info, 0, sizeof(struct rte_eth_dev_info)); + dev_info->rx_desc_lim = lim; + dev_info->tx_desc_lim = lim; + + RTE_FUNC_PTR_OR_RET(*dev->dev_ops->dev_infos_get); + (*dev->dev_ops->dev_infos_get)(dev, dev_info); + dev_info->pci_dev = dev->pci_dev; + dev_info->driver_name = dev->data->drv_name; +} + +int +rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask, + uint32_t *ptypes, int num) +{ + int i, j; + struct rte_eth_dev *dev; + const uint32_t *all_ptypes; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_supported_ptypes_get, 0); + all_ptypes = (*dev->dev_ops->dev_supported_ptypes_get)(dev); + + if (!all_ptypes) + return 0; + + for (i = 0, j = 0; all_ptypes[i] != RTE_PTYPE_UNKNOWN; ++i) + if (all_ptypes[i] & ptype_mask) { + if (j < num) + ptypes[j] = all_ptypes[i]; + j++; + } + + return j; +} + +void +rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_RET(port_id); + dev = &rte_eth_devices[port_id]; + ether_addr_copy(&dev->data->mac_addrs[0], mac_addr); +} + + +int +rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + *mtu = dev->data->mtu; + return 0; +} + +int +rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu) +{ + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mtu_set, -ENOTSUP); + + ret = (*dev->dev_ops->mtu_set)(dev, mtu); + if (!ret) + dev->data->mtu = mtu; + + return ret; +} + +int +rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (!(dev->data->dev_conf.rxmode.hw_vlan_filter)) { + RTE_PMD_DEBUG_TRACE("port %d: vlan-filtering disabled\n", port_id); + return -ENOSYS; + } + + if (vlan_id > 4095) { + RTE_PMD_DEBUG_TRACE("(port_id=%d) invalid vlan_id=%u > 4095\n", + port_id, (unsigned) vlan_id); + return -EINVAL; + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_filter_set, -ENOTSUP); + + return (*dev->dev_ops->vlan_filter_set)(dev, vlan_id, on); +} + +int +rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + if (rx_queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid rx_queue_id=%d\n", port_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_strip_queue_set, -ENOTSUP); + (*dev->dev_ops->vlan_strip_queue_set)(dev, rx_queue_id, on); + + return 0; +} + +int +rte_eth_dev_set_vlan_ether_type(uint8_t port_id, + enum rte_vlan_type vlan_type, + uint16_t tpid) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_tpid_set, -ENOTSUP); + + return (*dev->dev_ops->vlan_tpid_set)(dev, vlan_type, tpid); +} + +int +rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask) +{ + struct rte_eth_dev *dev; + int ret = 0; + int mask = 0; + int cur, org = 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + /*check which option changed by application*/ + cur = !!(offload_mask & ETH_VLAN_STRIP_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.hw_vlan_strip); + if (cur != org) { + dev->data->dev_conf.rxmode.hw_vlan_strip = (uint8_t)cur; + mask |= ETH_VLAN_STRIP_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_FILTER_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.hw_vlan_filter); + if (cur != org) { + dev->data->dev_conf.rxmode.hw_vlan_filter = (uint8_t)cur; + mask |= ETH_VLAN_FILTER_MASK; + } + + cur = !!(offload_mask & ETH_VLAN_EXTEND_OFFLOAD); + org = !!(dev->data->dev_conf.rxmode.hw_vlan_extend); + if (cur != org) { + dev->data->dev_conf.rxmode.hw_vlan_extend = (uint8_t)cur; + mask |= ETH_VLAN_EXTEND_MASK; + } + + /*no change*/ + if (mask == 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_offload_set, -ENOTSUP); + (*dev->dev_ops->vlan_offload_set)(dev, mask); + + return ret; +} + +int +rte_eth_dev_get_vlan_offload(uint8_t port_id) +{ + struct rte_eth_dev *dev; + int ret = 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (dev->data->dev_conf.rxmode.hw_vlan_strip) + ret |= ETH_VLAN_STRIP_OFFLOAD; + + if (dev->data->dev_conf.rxmode.hw_vlan_filter) + ret |= ETH_VLAN_FILTER_OFFLOAD; + + if (dev->data->dev_conf.rxmode.hw_vlan_extend) + ret |= ETH_VLAN_EXTEND_OFFLOAD; + + return ret; +} + +int +rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->vlan_pvid_set, -ENOTSUP); + (*dev->dev_ops->vlan_pvid_set)(dev, pvid, on); + + return 0; +} + +int +rte_eth_dev_flow_ctrl_get(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_get, -ENOTSUP); + memset(fc_conf, 0, sizeof(*fc_conf)); + return (*dev->dev_ops->flow_ctrl_get)(dev, fc_conf); +} + +int +rte_eth_dev_flow_ctrl_set(uint8_t port_id, struct rte_eth_fc_conf *fc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if ((fc_conf->send_xon != 0) && (fc_conf->send_xon != 1)) { + RTE_PMD_DEBUG_TRACE("Invalid send_xon, only 0/1 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->flow_ctrl_set, -ENOTSUP); + return (*dev->dev_ops->flow_ctrl_set)(dev, fc_conf); +} + +int +rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, struct rte_eth_pfc_conf *pfc_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (pfc_conf->priority > (ETH_DCB_NUM_USER_PRIORITIES - 1)) { + RTE_PMD_DEBUG_TRACE("Invalid priority, only 0-7 allowed\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + /* High water, low water validation are device specific */ + if (*dev->dev_ops->priority_flow_ctrl_set) + return (*dev->dev_ops->priority_flow_ctrl_set)(dev, pfc_conf); + return -ENOTSUP; +} + +static int +rte_eth_check_reta_mask(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + uint16_t i, num; + + if (!reta_conf) + return -EINVAL; + + if (reta_size != RTE_ALIGN(reta_size, RTE_RETA_GROUP_SIZE)) { + RTE_PMD_DEBUG_TRACE("Invalid reta size, should be %u aligned\n", + RTE_RETA_GROUP_SIZE); + return -EINVAL; + } + + num = reta_size / RTE_RETA_GROUP_SIZE; + for (i = 0; i < num; i++) { + if (reta_conf[i].mask) + return 0; + } + + return -EINVAL; +} + +static int +rte_eth_check_reta_entry(struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size, + uint16_t max_rxq) +{ + uint16_t i, idx, shift; + + if (!reta_conf) + return -EINVAL; + + if (max_rxq == 0) { + RTE_PMD_DEBUG_TRACE("No receive queue is available\n"); + return -EINVAL; + } + + for (i = 0; i < reta_size; i++) { + idx = i / RTE_RETA_GROUP_SIZE; + shift = i % RTE_RETA_GROUP_SIZE; + if ((reta_conf[idx].mask & (1ULL << shift)) && + (reta_conf[idx].reta[shift] >= max_rxq)) { + RTE_PMD_DEBUG_TRACE("reta_conf[%u]->reta[%u]: %u exceeds " + "the maximum rxq index: %u\n", idx, shift, + reta_conf[idx].reta[shift], max_rxq); + return -EINVAL; + } + } + + return 0; +} + +int +rte_eth_dev_rss_reta_update(uint8_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + + /* Check entry value */ + ret = rte_eth_check_reta_entry(reta_conf, reta_size, + dev->data->nb_rx_queues); + if (ret < 0) + return ret; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_update, -ENOTSUP); + return (*dev->dev_ops->reta_update)(dev, reta_conf, reta_size); +} + +int +rte_eth_dev_rss_reta_query(uint8_t port_id, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size) +{ + struct rte_eth_dev *dev; + int ret; + + if (port_id >= nb_ports) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + /* Check mask bits */ + ret = rte_eth_check_reta_mask(reta_conf, reta_size); + if (ret < 0) + return ret; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->reta_query, -ENOTSUP); + return (*dev->dev_ops->reta_query)(dev, reta_conf, reta_size); +} + +int +rte_eth_dev_rss_hash_update(uint8_t port_id, struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + uint16_t rss_hash_protos; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + rss_hash_protos = rss_conf->rss_hf; + if ((rss_hash_protos != 0) && + ((rss_hash_protos & ETH_RSS_PROTO_MASK) == 0)) { + RTE_PMD_DEBUG_TRACE("Invalid rss_hash_protos=0x%x\n", + rss_hash_protos); + return -EINVAL; + } + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_update, -ENOTSUP); + return (*dev->dev_ops->rss_hash_update)(dev, rss_conf); +} + +int +rte_eth_dev_rss_hash_conf_get(uint8_t port_id, + struct rte_eth_rss_conf *rss_conf) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rss_hash_conf_get, -ENOTSUP); + return (*dev->dev_ops->rss_hash_conf_get)(dev, rss_conf); +} + +int +rte_eth_dev_udp_tunnel_port_add(uint8_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (udp_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_add, -ENOTSUP); + return (*dev->dev_ops->udp_tunnel_port_add)(dev, udp_tunnel); +} + +int +rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id, + struct rte_eth_udp_tunnel *udp_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + if (udp_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid udp_tunnel parameter\n"); + return -EINVAL; + } + + if (udp_tunnel->prot_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->udp_tunnel_port_del, -ENOTSUP); + return (*dev->dev_ops->udp_tunnel_port_del)(dev, udp_tunnel); +} + +int +rte_eth_led_on(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_on, -ENOTSUP); + return (*dev->dev_ops->dev_led_on)(dev); +} + +int +rte_eth_led_off(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->dev_led_off, -ENOTSUP); + return (*dev->dev_ops->dev_led_off)(dev); +} + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + rte_eth_dev_info_get(port_id, &dev_info); + + for (i = 0; i < dev_info.max_mac_addrs; i++) + if (memcmp(addr, &dev->data->mac_addrs[i], ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +static const struct ether_addr null_mac_addr; + +int +rte_eth_dev_mac_addr_add(uint8_t port_id, struct ether_addr *addr, + uint32_t pool) +{ + struct rte_eth_dev *dev; + int index; + uint64_t pool_mask; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_add, -ENOTSUP); + + if (is_zero_ether_addr(addr)) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + if (pool >= ETH_64_POOLS) { + RTE_PMD_DEBUG_TRACE("pool id must be 0-%d\n", ETH_64_POOLS - 1); + return -EINVAL; + } + + index = get_mac_addr_index(port_id, addr); + if (index < 0) { + index = get_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", + port_id); + return -ENOSPC; + } + } else { + pool_mask = dev->data->mac_pool_sel[index]; + + /* Check if both MAC address and pool is already there, and do nothing */ + if (pool_mask & (1ULL << pool)) + return 0; + } + + /* Update NIC */ + (*dev->dev_ops->mac_addr_add)(dev, addr, index, pool); + + /* Update address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[index]); + + /* Update pool bitmap in NIC data structure */ + dev->data->mac_pool_sel[index] |= (1ULL << pool); + + return 0; +} + +int +rte_eth_dev_mac_addr_remove(uint8_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + int index; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_remove, -ENOTSUP); + + index = get_mac_addr_index(port_id, addr); + if (index == 0) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot remove default MAC address\n", port_id); + return -EADDRINUSE; + } else if (index < 0) + return 0; /* Do nothing if address wasn't found */ + + /* Update NIC */ + (*dev->dev_ops->mac_addr_remove)(dev, index); + + /* Update address in NIC data structure */ + ether_addr_copy(&null_mac_addr, &dev->data->mac_addrs[index]); + + /* reset pool bitmap */ + dev->data->mac_pool_sel[index] = 0; + + return 0; +} + +int +rte_eth_dev_default_mac_addr_set(uint8_t port_id, struct ether_addr *addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (!is_valid_assigned_ether_addr(addr)) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mac_addr_set, -ENOTSUP); + + /* Update default address in NIC data structure */ + ether_addr_copy(addr, &dev->data->mac_addrs[0]); + + (*dev->dev_ops->mac_addr_set)(dev, addr); + + return 0; +} + +int +rte_eth_dev_set_vf_rxmode(uint8_t port_id, uint16_t vf, + uint16_t rx_mode, uint8_t on) +{ + uint16_t num_vfs; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + + num_vfs = dev_info.max_vfs; + if (vf > num_vfs) { + RTE_PMD_DEBUG_TRACE("set VF RX mode:invalid VF id %d\n", vf); + return -EINVAL; + } + + if (rx_mode == 0) { + RTE_PMD_DEBUG_TRACE("set VF RX mode:mode mask ca not be zero\n"); + return -EINVAL; + } + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx_mode, -ENOTSUP); + return (*dev->dev_ops->set_vf_rx_mode)(dev, vf, rx_mode, on); +} + +/* + * Returns index into MAC address array of addr. Use 00:00:00:00:00:00 to find + * an empty spot. + */ +static int +get_hash_mac_addr_index(uint8_t port_id, const struct ether_addr *addr) +{ + struct rte_eth_dev_info dev_info; + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + unsigned i; + + rte_eth_dev_info_get(port_id, &dev_info); + if (!dev->data->hash_mac_addrs) + return -1; + + for (i = 0; i < dev_info.max_hash_mac_addrs; i++) + if (memcmp(addr, &dev->data->hash_mac_addrs[i], + ETHER_ADDR_LEN) == 0) + return i; + + return -1; +} + +int +rte_eth_dev_uc_hash_table_set(uint8_t port_id, struct ether_addr *addr, + uint8_t on) +{ + int index; + int ret; + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + if (is_zero_ether_addr(addr)) { + RTE_PMD_DEBUG_TRACE("port %d: Cannot add NULL MAC address\n", + port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, addr); + /* Check if it's already there, and do nothing */ + if ((index >= 0) && (on)) + return 0; + + if (index < 0) { + if (!on) { + RTE_PMD_DEBUG_TRACE("port %d: the MAC address was not " + "set in UTA\n", port_id); + return -EINVAL; + } + + index = get_hash_mac_addr_index(port_id, &null_mac_addr); + if (index < 0) { + RTE_PMD_DEBUG_TRACE("port %d: MAC address array full\n", + port_id); + return -ENOSPC; + } + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_hash_table_set, -ENOTSUP); + ret = (*dev->dev_ops->uc_hash_table_set)(dev, addr, on); + if (ret == 0) { + /* Update address in NIC data structure */ + if (on) + ether_addr_copy(addr, + &dev->data->hash_mac_addrs[index]); + else + ether_addr_copy(&null_mac_addr, + &dev->data->hash_mac_addrs[index]); + } + + return ret; +} + +int +rte_eth_dev_uc_all_hash_table_set(uint8_t port_id, uint8_t on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->uc_all_hash_table_set, -ENOTSUP); + return (*dev->dev_ops->uc_all_hash_table_set)(dev, on); +} + +int +rte_eth_dev_set_vf_rx(uint8_t port_id, uint16_t vf, uint8_t on) +{ + uint16_t num_vfs; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + + num_vfs = dev_info.max_vfs; + if (vf > num_vfs) { + RTE_PMD_DEBUG_TRACE("port %d: invalid vf id\n", port_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rx, -ENOTSUP); + return (*dev->dev_ops->set_vf_rx)(dev, vf, on); +} + +int +rte_eth_dev_set_vf_tx(uint8_t port_id, uint16_t vf, uint8_t on) +{ + uint16_t num_vfs; + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + + num_vfs = dev_info.max_vfs; + if (vf > num_vfs) { + RTE_PMD_DEBUG_TRACE("set pool tx:invalid pool id=%d\n", vf); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_tx, -ENOTSUP); + return (*dev->dev_ops->set_vf_tx)(dev, vf, on); +} + +int +rte_eth_dev_set_vf_vlan_filter(uint8_t port_id, uint16_t vlan_id, + uint64_t vf_mask, uint8_t vlan_on) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + if (vlan_id > ETHER_MAX_VLAN_ID) { + RTE_PMD_DEBUG_TRACE("VF VLAN filter:invalid VLAN id=%d\n", + vlan_id); + return -EINVAL; + } + + if (vf_mask == 0) { + RTE_PMD_DEBUG_TRACE("VF VLAN filter:pool_mask can not be 0\n"); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_vlan_filter, -ENOTSUP); + return (*dev->dev_ops->set_vf_vlan_filter)(dev, vlan_id, + vf_mask, vlan_on); +} + +int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, + uint16_t tx_rate) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (queue_idx > dev_info.max_tx_queues) { + RTE_PMD_DEBUG_TRACE("set queue rate limit:port %d: " + "invalid queue id=%d\n", port_id, queue_idx); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + RTE_PMD_DEBUG_TRACE("set queue rate limit:invalid tx_rate=%d, " + "bigger than link speed= %d\n", + tx_rate, link.link_speed); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_queue_rate_limit, -ENOTSUP); + return (*dev->dev_ops->set_queue_rate_limit)(dev, queue_idx, tx_rate); +} + +int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, uint16_t tx_rate, + uint64_t q_msk) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_info dev_info; + struct rte_eth_link link; + + if (q_msk == 0) + return 0; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + rte_eth_dev_info_get(port_id, &dev_info); + link = dev->data->dev_link; + + if (vf > dev_info.max_vfs) { + RTE_PMD_DEBUG_TRACE("set VF rate limit:port %d: " + "invalid vf id=%d\n", port_id, vf); + return -EINVAL; + } + + if (tx_rate > link.link_speed) { + RTE_PMD_DEBUG_TRACE("set VF rate limit:invalid tx_rate=%d, " + "bigger than link speed= %d\n", + tx_rate, link.link_speed); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_vf_rate_limit, -ENOTSUP); + return (*dev->dev_ops->set_vf_rate_limit)(dev, vf, tx_rate, q_msk); +} + +int +rte_eth_mirror_rule_set(uint8_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, uint8_t on) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (mirror_conf->rule_type == 0) { + RTE_PMD_DEBUG_TRACE("mirror rule type can not be 0.\n"); + return -EINVAL; + } + + if (mirror_conf->dst_pool >= ETH_64_POOLS) { + RTE_PMD_DEBUG_TRACE("Invalid dst pool, pool id must be 0-%d\n", + ETH_64_POOLS - 1); + return -EINVAL; + } + + if ((mirror_conf->rule_type & (ETH_MIRROR_VIRTUAL_POOL_UP | + ETH_MIRROR_VIRTUAL_POOL_DOWN)) && + (mirror_conf->pool_mask == 0)) { + RTE_PMD_DEBUG_TRACE("Invalid mirror pool, pool mask can not be 0.\n"); + return -EINVAL; + } + + if ((mirror_conf->rule_type & ETH_MIRROR_VLAN) && + mirror_conf->vlan.vlan_mask == 0) { + RTE_PMD_DEBUG_TRACE("Invalid vlan mask, vlan mask can not be 0.\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_set, -ENOTSUP); + + return (*dev->dev_ops->mirror_rule_set)(dev, mirror_conf, rule_id, on); +} + +int +rte_eth_mirror_rule_reset(uint8_t port_id, uint8_t rule_id) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->mirror_rule_reset, -ENOTSUP); + + return (*dev->dev_ops->mirror_rule_reset)(dev, rule_id); +} + +int +rte_eth_dev_callback_register(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *user_cb; + + if (!cb_fn) + return -EINVAL; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + TAILQ_FOREACH(user_cb, &(dev->link_intr_cbs), next) { + if (user_cb->cb_fn == cb_fn && + user_cb->cb_arg == cb_arg && + user_cb->event == event) { + break; + } + } + + /* create a new callback. */ + if (user_cb == NULL) + user_cb = rte_zmalloc("INTR_USER_CALLBACK", + sizeof(struct rte_eth_dev_callback), 0); + if (user_cb != NULL) { + user_cb->cb_fn = cb_fn; + user_cb->cb_arg = cb_arg; + user_cb->event = event; + TAILQ_INSERT_TAIL(&(dev->link_intr_cbs), user_cb, next); + } + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return (user_cb == NULL) ? -ENOMEM : 0; +} + +int +rte_eth_dev_callback_unregister(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_eth_dev *dev; + struct rte_eth_dev_callback *cb, *next; + + if (!cb_fn) + return -EINVAL; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + + dev = &rte_eth_devices[port_id]; + rte_spinlock_lock(&rte_eth_dev_cb_lock); + + ret = 0; + for (cb = TAILQ_FIRST(&dev->link_intr_cbs); cb != NULL; cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn != cb_fn || cb->event != event || + (cb->cb_arg != (void *)-1 && + cb->cb_arg != cb_arg)) + continue; + + /* + * if this callback is not executing right now, + * then remove it. + */ + if (cb->active == 0) { + TAILQ_REMOVE(&(dev->link_intr_cbs), cb, next); + rte_free(cb); + } else { + ret = -EAGAIN; + } + } + + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + return ret; +} + +void +_rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event) +{ + struct rte_eth_dev_callback *cb_lst; + struct rte_eth_dev_callback dev_cb; + + rte_spinlock_lock(&rte_eth_dev_cb_lock); + TAILQ_FOREACH(cb_lst, &(dev->link_intr_cbs), next) { + if (cb_lst->cb_fn == NULL || cb_lst->event != event) + continue; + dev_cb = *cb_lst; + cb_lst->active = 1; + rte_spinlock_unlock(&rte_eth_dev_cb_lock); + dev_cb.cb_fn(dev->data->port_id, dev_cb.event, + dev_cb.cb_arg); + rte_spinlock_lock(&rte_eth_dev_cb_lock); + cb_lst->active = 0; + } + rte_spinlock_unlock(&rte_eth_dev_cb_lock); +} + +int +rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + uint16_t qid; + int rc; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + intr_handle = &dev->pci_dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); + return -EPERM; + } + + for (qid = 0; qid < dev->data->nb_rx_queues; qid++) { + vec = intr_handle->intr_vec[qid]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" + " op %d epfd %d vec %u\n", + port_id, qid, op, epfd, vec); + } + } + + return 0; +} + +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *dev, const char *ring_name, + uint16_t queue_id, size_t size, unsigned align, + int socket_id) +{ + char z_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + + snprintf(z_name, sizeof(z_name), "%s_%s_%d_%d", + dev->driver->pci_drv.name, ring_name, + dev->data->port_id, queue_id); + + mz = rte_memzone_lookup(z_name); + if (mz) + return mz; + + if (rte_xen_dom0_supported()) + return rte_memzone_reserve_bounded(z_name, size, socket_id, + 0, align, RTE_PGSIZE_2M); + else + return rte_memzone_reserve_aligned(z_name, size, socket_id, + 0, align); +} + +int +rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, + int epfd, int op, void *data) +{ + uint32_t vec; + struct rte_eth_dev *dev; + struct rte_intr_handle *intr_handle; + int rc; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%u\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%u\n", queue_id); + return -EINVAL; + } + + intr_handle = &dev->pci_dev->intr_handle; + if (!intr_handle->intr_vec) { + RTE_PMD_DEBUG_TRACE("RX Intr vector unset\n"); + return -EPERM; + } + + vec = intr_handle->intr_vec[queue_id]; + rc = rte_intr_rx_ctl(intr_handle, epfd, op, vec, data); + if (rc && rc != -EEXIST) { + RTE_PMD_DEBUG_TRACE("p %u q %u rx ctl error" + " op %d epfd %d vec %u\n", + port_id, queue_id, op, epfd, vec); + return rc; + } + + return 0; +} + +int +rte_eth_dev_rx_intr_enable(uint8_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_enable, -ENOTSUP); + return (*dev->dev_ops->rx_queue_intr_enable)(dev, queue_id); +} + +int +rte_eth_dev_rx_intr_disable(uint8_t port_id, + uint16_t queue_id) +{ + struct rte_eth_dev *dev; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_intr_disable, -ENOTSUP); + return (*dev->dev_ops->rx_queue_intr_disable)(dev, queue_id); +} + +#ifdef RTE_NIC_BYPASS +int rte_eth_dev_bypass_init(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_init, -ENOTSUP); + (*dev->dev_ops->bypass_init)(dev); + return 0; +} + +int +rte_eth_dev_bypass_state_show(uint8_t port_id, uint32_t *state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP); + (*dev->dev_ops->bypass_state_show)(dev, state); + return 0; +} + +int +rte_eth_dev_bypass_state_set(uint8_t port_id, uint32_t *new_state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_set, -ENOTSUP); + (*dev->dev_ops->bypass_state_set)(dev, new_state); + return 0; +} + +int +rte_eth_dev_bypass_event_show(uint8_t port_id, uint32_t event, uint32_t *state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_state_show, -ENOTSUP); + (*dev->dev_ops->bypass_event_show)(dev, event, state); + return 0; +} + +int +rte_eth_dev_bypass_event_store(uint8_t port_id, uint32_t event, uint32_t state) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_event_set, -ENOTSUP); + (*dev->dev_ops->bypass_event_set)(dev, event, state); + return 0; +} + +int +rte_eth_dev_wd_timeout_store(uint8_t port_id, uint32_t timeout) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_set, -ENOTSUP); + (*dev->dev_ops->bypass_wd_timeout_set)(dev, timeout); + return 0; +} + +int +rte_eth_dev_bypass_ver_show(uint8_t port_id, uint32_t *ver) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_ver_show, -ENOTSUP); + (*dev->dev_ops->bypass_ver_show)(dev, ver); + return 0; +} + +int +rte_eth_dev_bypass_wd_timeout_show(uint8_t port_id, uint32_t *wd_timeout) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_timeout_show, -ENOTSUP); + (*dev->dev_ops->bypass_wd_timeout_show)(dev, wd_timeout); + return 0; +} + +int +rte_eth_dev_bypass_wd_reset(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->bypass_wd_reset, -ENOTSUP); + (*dev->dev_ops->bypass_wd_reset)(dev); + return 0; +} +#endif + +int +rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return (*dev->dev_ops->filter_ctrl)(dev, filter_type, + RTE_ETH_FILTER_NOP, NULL); +} + +int +rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->filter_ctrl, -ENOTSUP); + return (*dev->dev_ops->filter_ctrl)(dev, filter_type, filter_op, arg); +} + +void * +rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.rx = fn; + cb->param = user_param; + + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].post_rx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + + return cb; +} + +void * +rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + rte_errno = ENOTSUP; + return NULL; +#endif + /* check input parameters */ + if (!rte_eth_dev_is_valid_port(port_id) || fn == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) { + rte_errno = EINVAL; + return NULL; + } + + struct rte_eth_rxtx_callback *cb = rte_zmalloc(NULL, sizeof(*cb), 0); + + if (cb == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + cb->fn.tx = fn; + cb->param = user_param; + + /* Add the callbacks in fifo order. */ + struct rte_eth_rxtx_callback *tail = + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id]; + + if (!tail) { + rte_eth_devices[port_id].pre_tx_burst_cbs[queue_id] = cb; + + } else { + while (tail->next) + tail = tail->next; + tail->next = cb; + } + + return cb; +} + +int +rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_rx_queues) { + return -EINVAL; + } + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id]; + struct rte_eth_rxtx_callback *prev_cb; + + /* Reset head pointer and remove user cb if first in the list. */ + if (cb == user_cb) { + dev->post_rx_burst_cbs[queue_id] = user_cb->next; + return 0; + } + + /* Remove the user cb from the callback list. */ + do { + prev_cb = cb; + cb = cb->next; + + if (cb == user_cb) { + prev_cb->next = user_cb->next; + return 0; + } + + } while (cb != NULL); + + /* Callback wasn't found. */ + return -EINVAL; +} + +int +rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb) +{ +#ifndef RTE_ETHDEV_RXTX_CALLBACKS + return -ENOTSUP; +#endif + /* Check input parameters. */ + if (!rte_eth_dev_is_valid_port(port_id) || user_cb == NULL || + queue_id >= rte_eth_devices[port_id].data->nb_tx_queues) { + return -EINVAL; + } + + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id]; + struct rte_eth_rxtx_callback *prev_cb; + + /* Reset head pointer and remove user cb if first in the list. */ + if (cb == user_cb) { + dev->pre_tx_burst_cbs[queue_id] = user_cb->next; + return 0; + } + + /* Remove the user cb from the callback list. */ + do { + prev_cb = cb; + cb = cb->next; + + if (cb == user_cb) { + prev_cb->next = user_cb->next; + return 0; + } + + } while (cb != NULL); + + /* Callback wasn't found. */ + return -EINVAL; +} + +int +rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rxq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->rxq_info_get(dev, queue_id, qinfo); + return 0; +} + +int +rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (qinfo == NULL) + return -EINVAL; + + dev = &rte_eth_devices[port_id]; + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + return -EINVAL; + } + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->txq_info_get, -ENOTSUP); + + memset(qinfo, 0, sizeof(*qinfo)); + dev->dev_ops->txq_info_get(dev, queue_id, qinfo); + return 0; +} + +int +rte_eth_dev_set_mc_addr_list(uint8_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_mc_addr_list, -ENOTSUP); + return dev->dev_ops->set_mc_addr_list(dev, mc_addr_set, nb_mc_addr); +} + +int +rte_eth_timesync_enable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_enable, -ENOTSUP); + return (*dev->dev_ops->timesync_enable)(dev); +} + +int +rte_eth_timesync_disable(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_disable, -ENOTSUP); + return (*dev->dev_ops->timesync_disable)(dev); +} + +int +rte_eth_timesync_read_rx_timestamp(uint8_t port_id, struct timespec *timestamp, + uint32_t flags) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_rx_timestamp, -ENOTSUP); + return (*dev->dev_ops->timesync_read_rx_timestamp)(dev, timestamp, flags); +} + +int +rte_eth_timesync_read_tx_timestamp(uint8_t port_id, struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_tx_timestamp, -ENOTSUP); + return (*dev->dev_ops->timesync_read_tx_timestamp)(dev, timestamp); +} + +int +rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_adjust_time, -ENOTSUP); + return (*dev->dev_ops->timesync_adjust_time)(dev, delta); +} + +int +rte_eth_timesync_read_time(uint8_t port_id, struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_read_time, -ENOTSUP); + return (*dev->dev_ops->timesync_read_time)(dev, timestamp); +} + +int +rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *timestamp) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + dev = &rte_eth_devices[port_id]; + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->timesync_write_time, -ENOTSUP); + return (*dev->dev_ops->timesync_write_time)(dev, timestamp); +} + +int +rte_eth_dev_get_reg_length(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg_length, -ENOTSUP); + return (*dev->dev_ops->get_reg_length)(dev); +} + +int +rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_reg, -ENOTSUP); + return (*dev->dev_ops->get_reg)(dev, info); +} + +int +rte_eth_dev_get_eeprom_length(uint8_t port_id) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom_length, -ENOTSUP); + return (*dev->dev_ops->get_eeprom_length)(dev); +} + +int +rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_eeprom, -ENOTSUP); + return (*dev->dev_ops->get_eeprom)(dev, info); +} + +int +rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->set_eeprom, -ENOTSUP); + return (*dev->dev_ops->set_eeprom)(dev, info); +} + +int +rte_eth_dev_get_dcb_info(uint8_t port_id, + struct rte_eth_dcb_info *dcb_info) +{ + struct rte_eth_dev *dev; + + if (!rte_eth_dev_is_valid_port(port_id)) { + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); + return -ENODEV; + } + + dev = &rte_eth_devices[port_id]; + memset(dcb_info, 0, sizeof(struct rte_eth_dcb_info)); + + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->get_dcb_info, -ENOTSUP); + return (*dev->dev_ops->get_dcb_info)(dev, dcb_info); +} + +void +rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, struct rte_pci_device *pci_dev) +{ + if ((eth_dev == NULL) || (pci_dev == NULL)) { + RTE_PMD_DEBUG_TRACE("NULL pointer eth_dev=%p pci_dev=%p\n", + eth_dev, pci_dev); + return; + } + + eth_dev->data->dev_flags = 0; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_INTR_LSC) + eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC; + if (pci_dev->driver->drv_flags & RTE_PCI_DRV_DETACHABLE) + eth_dev->data->dev_flags |= RTE_ETH_DEV_DETACHABLE; + + eth_dev->data->kdrv = pci_dev->kdrv; + eth_dev->data->numa_node = pci_dev->numa_node; + eth_dev->data->drv_name = pci_dev->driver->name; +} + +int +rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + if (l2_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n"); + return -EINVAL; + } + + if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_eth_type_conf, + -ENOTSUP); + return (*dev->dev_ops->l2_tunnel_eth_type_conf)(dev, l2_tunnel); +} + +int +rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en) +{ + struct rte_eth_dev *dev; + + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + + if (l2_tunnel == NULL) { + RTE_PMD_DEBUG_TRACE("Invalid l2_tunnel parameter\n"); + return -EINVAL; + } + + if (l2_tunnel->l2_tunnel_type >= RTE_TUNNEL_TYPE_MAX) { + RTE_PMD_DEBUG_TRACE("Invalid tunnel type.\n"); + return -EINVAL; + } + + if (mask == 0) { + RTE_PMD_DEBUG_TRACE("Mask should have a value.\n"); + return -EINVAL; + } + + dev = &rte_eth_devices[port_id]; + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->l2_tunnel_offload_set, + -ENOTSUP); + return (*dev->dev_ops->l2_tunnel_offload_set)(dev, l2_tunnel, mask, en); +} diff --git a/lib/librte_ether/rte_ethdev.h b/lib/librte_ether/rte_ethdev.h new file mode 100644 index 00000000..022733ec --- /dev/null +++ b/lib/librte_ether/rte_ethdev.h @@ -0,0 +1,4286 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHDEV_H_ +#define _RTE_ETHDEV_H_ + +/** + * @file + * + * RTE Ethernet Device API + * + * The Ethernet Device API is composed of two parts: + * + * - The application-oriented Ethernet API that includes functions to setup + * an Ethernet device (configure it, setup its RX and TX queues and start it), + * to get its MAC address, the speed and the status of its physical link, + * to receive and to transmit packets, and so on. + * + * - The driver-oriented Ethernet API that exports a function allowing + * an Ethernet Poll Mode Driver (PMD) to simultaneously register itself as + * an Ethernet device driver and as a PCI driver for a set of matching PCI + * [Ethernet] devices classes. + * + * By default, all the functions of the Ethernet Device API exported by a PMD + * are lock-free functions which assume to not be invoked in parallel on + * different logical cores to work on the same target object. For instance, + * the receive function of a PMD cannot be invoked in parallel on two logical + * cores to poll the same RX queue [of the same port]. Of course, this function + * can be invoked in parallel by different logical cores on different RX queues. + * It is the responsibility of the upper level application to enforce this rule. + * + * If needed, parallel accesses by multiple logical cores to shared queues + * shall be explicitly protected by dedicated inline lock-aware functions + * built on top of their corresponding lock-free functions of the PMD API. + * + * In all functions of the Ethernet API, the Ethernet device is + * designated by an integer >= 0 named the device port identifier. + * + * At the Ethernet driver level, Ethernet devices are represented by a generic + * data structure of type *rte_eth_dev*. + * + * Ethernet devices are dynamically registered during the PCI probing phase + * performed at EAL initialization time. + * When an Ethernet device is being probed, an *rte_eth_dev* structure and + * a new port identifier are allocated for that device. Then, the eth_dev_init() + * function supplied by the Ethernet driver matching the probed PCI + * device is invoked to properly initialize the device. + * + * The role of the device init function consists of resetting the hardware, + * checking access to Non-volatile Memory (NVM), reading the MAC address + * from NVM etc. + * + * If the device init operation is successful, the correspondence between + * the port identifier assigned to the new device and its associated + * *rte_eth_dev* structure is effectively registered. + * Otherwise, both the *rte_eth_dev* structure and the port identifier are + * freed. + * + * The functions exported by the application Ethernet API to setup a device + * designated by its port identifier must be invoked in the following order: + * - rte_eth_dev_configure() + * - rte_eth_tx_queue_setup() + * - rte_eth_rx_queue_setup() + * - rte_eth_dev_start() + * + * Then, the network application can invoke, in any order, the functions + * exported by the Ethernet API to get the MAC address of a given device, to + * get the speed and the status of a device physical link, to receive/transmit + * [burst of] packets, and so on. + * + * If the application wants to change the configuration (i.e. call + * rte_eth_dev_configure(), rte_eth_tx_queue_setup(), or + * rte_eth_rx_queue_setup()), it must call rte_eth_dev_stop() first to stop the + * device and then do the reconfiguration before calling rte_eth_dev_start() + * again. The tramsit and receive functions should not be invoked when the + * device is stopped. + * + * Please note that some configuration is not stored between calls to + * rte_eth_dev_stop()/rte_eth_dev_start(). The following configuration will + * be retained: + * + * - flow control settings + * - receive mode configuration (promiscuous mode, hardware checksum mode, + * RSS/VMDQ settings etc.) + * - VLAN filtering configuration + * - MAC addresses supplied to MAC address array + * - flow director filtering mode (but not filtering rules) + * - NIC queue statistics mappings + * + * Any other configuration will not be stored and will need to be re-entered + * after a call to rte_eth_dev_start(). + * + * Finally, a network application can close an Ethernet device by invoking the + * rte_eth_dev_close() function. + * + * Each function of the application Ethernet API invokes a specific function + * of the PMD that controls the target device designated by its port + * identifier. + * For this purpose, all device-specific functions of an Ethernet driver are + * supplied through a set of pointers contained in a generic structure of type + * *eth_dev_ops*. + * The address of the *eth_dev_ops* structure is stored in the *rte_eth_dev* + * structure by the device init function of the Ethernet driver, which is + * invoked during the PCI probing phase, as explained earlier. + * + * In other words, each function of the Ethernet API simply retrieves the + * *rte_eth_dev* structure associated with the device port identifier and + * performs an indirect invocation of the corresponding driver function + * supplied in the *eth_dev_ops* structure of the *rte_eth_dev* structure. + * + * For performance reasons, the address of the burst-oriented RX and TX + * functions of the Ethernet driver are not contained in the *eth_dev_ops* + * structure. Instead, they are directly stored at the beginning of the + * *rte_eth_dev* structure to avoid an extra indirect memory access during + * their invocation. + * + * RTE ethernet device drivers do not use interrupts for transmitting or + * receiving. Instead, Ethernet drivers export Poll-Mode receive and transmit + * functions to applications. + * Both receive and transmit functions are packet-burst oriented to minimize + * their cost per packet through the following optimizations: + * + * - Sharing among multiple packets the incompressible cost of the + * invocation of receive/transmit functions. + * + * - Enabling receive/transmit functions to take advantage of burst-oriented + * hardware features (L1 cache, prefetch instructions, NIC head/tail + * registers) to minimize the number of CPU cycles per packet, for instance, + * by avoiding useless read memory accesses to ring descriptors, or by + * systematically using arrays of pointers that exactly fit L1 cache line + * boundaries and sizes. + * + * The burst-oriented receive function does not provide any error notification, + * to avoid the corresponding overhead. As a hint, the upper-level application + * might check the status of the device link once being systematically returned + * a 0 value by the receive function of the driver for a given number of tries. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include + +/* Use this macro to check if LRO API is supported */ +#define RTE_ETHDEV_HAS_LRO_SUPPORT + +#include +#include +#include +#include +#include +#include "rte_ether.h" +#include "rte_eth_ctrl.h" +#include "rte_dev_info.h" + +struct rte_mbuf; + +/** + * A structure used to retrieve statistics for an Ethernet port. + */ +struct rte_eth_stats { + uint64_t ipackets; /**< Total number of successfully received packets. */ + uint64_t opackets; /**< Total number of successfully transmitted packets.*/ + uint64_t ibytes; /**< Total number of successfully received bytes. */ + uint64_t obytes; /**< Total number of successfully transmitted bytes. */ + uint64_t imissed; + /**< Total of RX packets dropped by the HW, + * because there are no available mbufs (i.e. RX queues are full). + */ + uint64_t ibadcrc __rte_deprecated; + /**< Deprecated; Total of RX packets with CRC error. */ + uint64_t ibadlen __rte_deprecated; + /**< Deprecated; Total of RX packets with bad length. */ + uint64_t ierrors; /**< Total number of erroneous received packets. */ + uint64_t oerrors; /**< Total number of failed transmitted packets. */ + uint64_t imcasts; + /**< Deprecated; Total number of multicast received packets. */ + uint64_t rx_nombuf; /**< Total number of RX mbuf allocation failures. */ + uint64_t fdirmatch __rte_deprecated; + /**< Deprecated; Total number of RX packets matching a filter. */ + uint64_t fdirmiss __rte_deprecated; + /**< Deprecated; Total number of RX packets not matching any filter. */ + uint64_t tx_pause_xon __rte_deprecated; + /**< Deprecated; Total nb. of XON pause frame sent. */ + uint64_t rx_pause_xon __rte_deprecated; + /**< Deprecated; Total nb. of XON pause frame received. */ + uint64_t tx_pause_xoff __rte_deprecated; + /**< Deprecated; Total nb. of XOFF pause frame sent. */ + uint64_t rx_pause_xoff __rte_deprecated; + /**< Deprecated; Total nb. of XOFF pause frame received. */ + uint64_t q_ipackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue RX packets. */ + uint64_t q_opackets[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue TX packets. */ + uint64_t q_ibytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully received queue bytes. */ + uint64_t q_obytes[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of successfully transmitted queue bytes. */ + uint64_t q_errors[RTE_ETHDEV_QUEUE_STAT_CNTRS]; + /**< Total number of queue packets received that are dropped. */ + uint64_t ilbpackets; + /**< Total number of good packets received from loopback,VF Only */ + uint64_t olbpackets; + /**< Total number of good packets transmitted to loopback,VF Only */ + uint64_t ilbbytes; + /**< Total number of good bytes received from loopback,VF Only */ + uint64_t olbbytes; + /**< Total number of good bytes transmitted to loopback,VF Only */ +}; + +/** + * Device supported speeds bitmap flags + */ +#define ETH_LINK_SPEED_AUTONEG (0 << 0) /**< Autonegotiate (all speeds) */ +#define ETH_LINK_SPEED_FIXED (1 << 0) /**< Disable autoneg (fixed speed) */ +#define ETH_LINK_SPEED_10M_HD (1 << 1) /**< 10 Mbps half-duplex */ +#define ETH_LINK_SPEED_10M (1 << 2) /**< 10 Mbps full-duplex */ +#define ETH_LINK_SPEED_100M_HD (1 << 3) /**< 100 Mbps half-duplex */ +#define ETH_LINK_SPEED_100M (1 << 4) /**< 100 Mbps full-duplex */ +#define ETH_LINK_SPEED_1G (1 << 5) /**< 1 Gbps */ +#define ETH_LINK_SPEED_2_5G (1 << 6) /**< 2.5 Gbps */ +#define ETH_LINK_SPEED_5G (1 << 7) /**< 5 Gbps */ +#define ETH_LINK_SPEED_10G (1 << 8) /**< 10 Gbps */ +#define ETH_LINK_SPEED_20G (1 << 9) /**< 20 Gbps */ +#define ETH_LINK_SPEED_25G (1 << 10) /**< 25 Gbps */ +#define ETH_LINK_SPEED_40G (1 << 11) /**< 40 Gbps */ +#define ETH_LINK_SPEED_50G (1 << 12) /**< 50 Gbps */ +#define ETH_LINK_SPEED_56G (1 << 13) /**< 56 Gbps */ +#define ETH_LINK_SPEED_100G (1 << 14) /**< 100 Gbps */ + +/** + * Ethernet numeric link speeds in Mbps + */ +#define ETH_SPEED_NUM_NONE 0 /**< Not defined */ +#define ETH_SPEED_NUM_10M 10 /**< 10 Mbps */ +#define ETH_SPEED_NUM_100M 100 /**< 100 Mbps */ +#define ETH_SPEED_NUM_1G 1000 /**< 1 Gbps */ +#define ETH_SPEED_NUM_2_5G 2500 /**< 2.5 Gbps */ +#define ETH_SPEED_NUM_5G 5000 /**< 5 Gbps */ +#define ETH_SPEED_NUM_10G 10000 /**< 10 Gbps */ +#define ETH_SPEED_NUM_20G 20000 /**< 20 Gbps */ +#define ETH_SPEED_NUM_25G 25000 /**< 25 Gbps */ +#define ETH_SPEED_NUM_40G 40000 /**< 40 Gbps */ +#define ETH_SPEED_NUM_50G 50000 /**< 50 Gbps */ +#define ETH_SPEED_NUM_56G 56000 /**< 56 Gbps */ +#define ETH_SPEED_NUM_100G 100000 /**< 100 Gbps */ + +/** + * A structure used to retrieve link-level information of an Ethernet port. + */ +struct rte_eth_link { + uint32_t link_speed; /**< ETH_SPEED_NUM_ */ + uint16_t link_duplex : 1; /**< ETH_LINK_[HALF/FULL]_DUPLEX */ + uint16_t link_autoneg : 1; /**< ETH_LINK_SPEED_[AUTONEG/FIXED] */ + uint16_t link_status : 1; /**< ETH_LINK_[DOWN/UP] */ +} __attribute__((aligned(8))); /**< aligned for atomic64 read/write */ + +/* Utility constants */ +#define ETH_LINK_HALF_DUPLEX 0 /**< Half-duplex connection. */ +#define ETH_LINK_FULL_DUPLEX 1 /**< Full-duplex connection. */ +#define ETH_LINK_DOWN 0 /**< Link is down. */ +#define ETH_LINK_UP 1 /**< Link is up. */ +#define ETH_LINK_FIXED 0 /**< No autonegotiation. */ +#define ETH_LINK_AUTONEG 1 /**< Autonegotiated. */ + +/** + * A structure used to configure the ring threshold registers of an RX/TX + * queue for an Ethernet port. + */ +struct rte_eth_thresh { + uint8_t pthresh; /**< Ring prefetch threshold. */ + uint8_t hthresh; /**< Ring host threshold. */ + uint8_t wthresh; /**< Ring writeback threshold. */ +}; + +/** + * Simple flags are used for rte_eth_conf.rxmode.mq_mode. + */ +#define ETH_MQ_RX_RSS_FLAG 0x1 +#define ETH_MQ_RX_DCB_FLAG 0x2 +#define ETH_MQ_RX_VMDQ_FLAG 0x4 + +/** + * A set of values to identify what method is to be used to route + * packets to multiple queues. + */ +enum rte_eth_rx_mq_mode { + /** None of DCB,RSS or VMDQ mode */ + ETH_MQ_RX_NONE = 0, + + /** For RX side, only RSS is on */ + ETH_MQ_RX_RSS = ETH_MQ_RX_RSS_FLAG, + /** For RX side,only DCB is on. */ + ETH_MQ_RX_DCB = ETH_MQ_RX_DCB_FLAG, + /** Both DCB and RSS enable */ + ETH_MQ_RX_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG, + + /** Only VMDQ, no RSS nor DCB */ + ETH_MQ_RX_VMDQ_ONLY = ETH_MQ_RX_VMDQ_FLAG, + /** RSS mode with VMDQ */ + ETH_MQ_RX_VMDQ_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_VMDQ_FLAG, + /** Use VMDQ+DCB to route traffic to queues */ + ETH_MQ_RX_VMDQ_DCB = ETH_MQ_RX_VMDQ_FLAG | ETH_MQ_RX_DCB_FLAG, + /** Enable both VMDQ and DCB in VMDq */ + ETH_MQ_RX_VMDQ_DCB_RSS = ETH_MQ_RX_RSS_FLAG | ETH_MQ_RX_DCB_FLAG | + ETH_MQ_RX_VMDQ_FLAG, +}; + +/** + * for rx mq mode backward compatible + */ +#define ETH_RSS ETH_MQ_RX_RSS +#define VMDQ_DCB ETH_MQ_RX_VMDQ_DCB +#define ETH_DCB_RX ETH_MQ_RX_DCB + +/** + * A set of values to identify what method is to be used to transmit + * packets using multi-TCs. + */ +enum rte_eth_tx_mq_mode { + ETH_MQ_TX_NONE = 0, /**< It is in neither DCB nor VT mode. */ + ETH_MQ_TX_DCB, /**< For TX side,only DCB is on. */ + ETH_MQ_TX_VMDQ_DCB, /**< For TX side,both DCB and VT is on. */ + ETH_MQ_TX_VMDQ_ONLY, /**< Only VT on, no DCB */ +}; + +/** + * for tx mq mode backward compatible + */ +#define ETH_DCB_NONE ETH_MQ_TX_NONE +#define ETH_VMDQ_DCB_TX ETH_MQ_TX_VMDQ_DCB +#define ETH_DCB_TX ETH_MQ_TX_DCB + +/** + * A structure used to configure the RX features of an Ethernet port. + */ +struct rte_eth_rxmode { + /** The multi-queue packet distribution mode to be used, e.g. RSS. */ + enum rte_eth_rx_mq_mode mq_mode; + uint32_t max_rx_pkt_len; /**< Only used if jumbo_frame enabled. */ + uint16_t split_hdr_size; /**< hdr buf size (header_split enabled).*/ + uint16_t header_split : 1, /**< Header Split enable. */ + hw_ip_checksum : 1, /**< IP/UDP/TCP checksum offload enable. */ + hw_vlan_filter : 1, /**< VLAN filter enable. */ + hw_vlan_strip : 1, /**< VLAN strip enable. */ + hw_vlan_extend : 1, /**< Extended VLAN enable. */ + jumbo_frame : 1, /**< Jumbo Frame Receipt enable. */ + hw_strip_crc : 1, /**< Enable CRC stripping by hardware. */ + enable_scatter : 1, /**< Enable scatter packets rx handler */ + enable_lro : 1; /**< Enable LRO */ +}; + +/** + * VLAN types to indicate if it is for single VLAN, inner VLAN or outer VLAN. + * Note that single VLAN is treated the same as inner VLAN. + */ +enum rte_vlan_type { + ETH_VLAN_TYPE_UNKNOWN = 0, + ETH_VLAN_TYPE_INNER, /**< Single VLAN, or inner VLAN. */ + ETH_VLAN_TYPE_OUTER, /**< Outer VLAN. */ + ETH_VLAN_TYPE_MAX, +}; + +/** + * A structure used to configure the Receive Side Scaling (RSS) feature + * of an Ethernet port. + * If not NULL, the *rss_key* pointer of the *rss_conf* structure points + * to an array holding the RSS key to use for hashing specific header + * fields of received packets. The length of this array should be indicated + * by *rss_key_len* below. Otherwise, a default random hash key is used by + * the device driver. + * + * The *rss_key_len* field of the *rss_conf* structure indicates the length + * in bytes of the array pointed by *rss_key*. To be compatible, this length + * will be checked in i40e only. Others assume 40 bytes to be used as before. + * + * The *rss_hf* field of the *rss_conf* structure indicates the different + * types of IPv4/IPv6 packets to which the RSS hashing must be applied. + * Supplying an *rss_hf* equal to zero disables the RSS feature. + */ +struct rte_eth_rss_conf { + uint8_t *rss_key; /**< If not NULL, 40-byte hash key. */ + uint8_t rss_key_len; /**< hash key length in bytes. */ + uint64_t rss_hf; /**< Hash functions to apply - see below. */ +}; + +/* + * The RSS offload types are defined based on flow types which are defined + * in rte_eth_ctrl.h. Different NIC hardwares may support different RSS offload + * types. The supported flow types or RSS offload types can be queried by + * rte_eth_dev_info_get(). + */ +#define ETH_RSS_IPV4 (1ULL << RTE_ETH_FLOW_IPV4) +#define ETH_RSS_FRAG_IPV4 (1ULL << RTE_ETH_FLOW_FRAG_IPV4) +#define ETH_RSS_NONFRAG_IPV4_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_TCP) +#define ETH_RSS_NONFRAG_IPV4_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_UDP) +#define ETH_RSS_NONFRAG_IPV4_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_SCTP) +#define ETH_RSS_NONFRAG_IPV4_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV4_OTHER) +#define ETH_RSS_IPV6 (1ULL << RTE_ETH_FLOW_IPV6) +#define ETH_RSS_FRAG_IPV6 (1ULL << RTE_ETH_FLOW_FRAG_IPV6) +#define ETH_RSS_NONFRAG_IPV6_TCP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_TCP) +#define ETH_RSS_NONFRAG_IPV6_UDP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_UDP) +#define ETH_RSS_NONFRAG_IPV6_SCTP (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_SCTP) +#define ETH_RSS_NONFRAG_IPV6_OTHER (1ULL << RTE_ETH_FLOW_NONFRAG_IPV6_OTHER) +#define ETH_RSS_L2_PAYLOAD (1ULL << RTE_ETH_FLOW_L2_PAYLOAD) +#define ETH_RSS_IPV6_EX (1ULL << RTE_ETH_FLOW_IPV6_EX) +#define ETH_RSS_IPV6_TCP_EX (1ULL << RTE_ETH_FLOW_IPV6_TCP_EX) +#define ETH_RSS_IPV6_UDP_EX (1ULL << RTE_ETH_FLOW_IPV6_UDP_EX) + +#define ETH_RSS_IP ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_IPV6_EX) + +#define ETH_RSS_UDP ( \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_IPV6_UDP_EX) + +#define ETH_RSS_TCP ( \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_IPV6_TCP_EX) + +#define ETH_RSS_SCTP ( \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV6_SCTP) + +/**< Mask of valid RSS hash protocols */ +#define ETH_RSS_PROTO_MASK ( \ + ETH_RSS_IPV4 | \ + ETH_RSS_FRAG_IPV4 | \ + ETH_RSS_NONFRAG_IPV4_TCP | \ + ETH_RSS_NONFRAG_IPV4_UDP | \ + ETH_RSS_NONFRAG_IPV4_SCTP | \ + ETH_RSS_NONFRAG_IPV4_OTHER | \ + ETH_RSS_IPV6 | \ + ETH_RSS_FRAG_IPV6 | \ + ETH_RSS_NONFRAG_IPV6_TCP | \ + ETH_RSS_NONFRAG_IPV6_UDP | \ + ETH_RSS_NONFRAG_IPV6_SCTP | \ + ETH_RSS_NONFRAG_IPV6_OTHER | \ + ETH_RSS_L2_PAYLOAD | \ + ETH_RSS_IPV6_EX | \ + ETH_RSS_IPV6_TCP_EX | \ + ETH_RSS_IPV6_UDP_EX) + +/* + * Definitions used for redirection table entry size. + * Some RSS RETA sizes may not be supported by some drivers, check the + * documentation or the description of relevant functions for more details. + */ +#define ETH_RSS_RETA_SIZE_64 64 +#define ETH_RSS_RETA_SIZE_128 128 +#define ETH_RSS_RETA_SIZE_512 512 +#define RTE_RETA_GROUP_SIZE 64 + +/* Definitions used for VMDQ and DCB functionality */ +#define ETH_VMDQ_MAX_VLAN_FILTERS 64 /**< Maximum nb. of VMDQ vlan filters. */ +#define ETH_DCB_NUM_USER_PRIORITIES 8 /**< Maximum nb. of DCB priorities. */ +#define ETH_VMDQ_DCB_NUM_QUEUES 128 /**< Maximum nb. of VMDQ DCB queues. */ +#define ETH_DCB_NUM_QUEUES 128 /**< Maximum nb. of DCB queues. */ + +/* DCB capability defines */ +#define ETH_DCB_PG_SUPPORT 0x00000001 /**< Priority Group(ETS) support. */ +#define ETH_DCB_PFC_SUPPORT 0x00000002 /**< Priority Flow Control support. */ + +/* Definitions used for VLAN Offload functionality */ +#define ETH_VLAN_STRIP_OFFLOAD 0x0001 /**< VLAN Strip On/Off */ +#define ETH_VLAN_FILTER_OFFLOAD 0x0002 /**< VLAN Filter On/Off */ +#define ETH_VLAN_EXTEND_OFFLOAD 0x0004 /**< VLAN Extend On/Off */ + +/* Definitions used for mask VLAN setting */ +#define ETH_VLAN_STRIP_MASK 0x0001 /**< VLAN Strip setting mask */ +#define ETH_VLAN_FILTER_MASK 0x0002 /**< VLAN Filter setting mask*/ +#define ETH_VLAN_EXTEND_MASK 0x0004 /**< VLAN Extend setting mask*/ +#define ETH_VLAN_ID_MAX 0x0FFF /**< VLAN ID is in lower 12 bits*/ + +/* Definitions used for receive MAC address */ +#define ETH_NUM_RECEIVE_MAC_ADDR 128 /**< Maximum nb. of receive mac addr. */ + +/* Definitions used for unicast hash */ +#define ETH_VMDQ_NUM_UC_HASH_ARRAY 128 /**< Maximum nb. of UC hash array. */ + +/* Definitions used for VMDQ pool rx mode setting */ +#define ETH_VMDQ_ACCEPT_UNTAG 0x0001 /**< accept untagged packets. */ +#define ETH_VMDQ_ACCEPT_HASH_MC 0x0002 /**< accept packets in multicast table . */ +#define ETH_VMDQ_ACCEPT_HASH_UC 0x0004 /**< accept packets in unicast table. */ +#define ETH_VMDQ_ACCEPT_BROADCAST 0x0008 /**< accept broadcast packets. */ +#define ETH_VMDQ_ACCEPT_MULTICAST 0x0010 /**< multicast promiscuous. */ + +/** Maximum nb. of vlan per mirror rule */ +#define ETH_MIRROR_MAX_VLANS 64 + +#define ETH_MIRROR_VIRTUAL_POOL_UP 0x01 /**< Virtual Pool uplink Mirroring. */ +#define ETH_MIRROR_UPLINK_PORT 0x02 /**< Uplink Port Mirroring. */ +#define ETH_MIRROR_DOWNLINK_PORT 0x04 /**< Downlink Port Mirroring. */ +#define ETH_MIRROR_VLAN 0x08 /**< VLAN Mirroring. */ +#define ETH_MIRROR_VIRTUAL_POOL_DOWN 0x10 /**< Virtual Pool downlink Mirroring. */ + +/** + * A structure used to configure VLAN traffic mirror of an Ethernet port. + */ +struct rte_eth_vlan_mirror { + uint64_t vlan_mask; /**< mask for valid VLAN ID. */ + /** VLAN ID list for vlan mirroring. */ + uint16_t vlan_id[ETH_MIRROR_MAX_VLANS]; +}; + +/** + * A structure used to configure traffic mirror of an Ethernet port. + */ +struct rte_eth_mirror_conf { + uint8_t rule_type; /**< Mirroring rule type */ + uint8_t dst_pool; /**< Destination pool for this mirror rule. */ + uint64_t pool_mask; /**< Bitmap of pool for pool mirroring */ + /** VLAN ID setting for VLAN mirroring. */ + struct rte_eth_vlan_mirror vlan; +}; + +/** + * A structure used to configure 64 entries of Redirection Table of the + * Receive Side Scaling (RSS) feature of an Ethernet port. To configure + * more than 64 entries supported by hardware, an array of this structure + * is needed. + */ +struct rte_eth_rss_reta_entry64 { + uint64_t mask; + /**< Mask bits indicate which entries need to be updated/queried. */ + uint16_t reta[RTE_RETA_GROUP_SIZE]; + /**< Group of 64 redirection table entries. */ +}; + +/** + * This enum indicates the possible number of traffic classes + * in DCB configratioins + */ +enum rte_eth_nb_tcs { + ETH_4_TCS = 4, /**< 4 TCs with DCB. */ + ETH_8_TCS = 8 /**< 8 TCs with DCB. */ +}; + +/** + * This enum indicates the possible number of queue pools + * in VMDQ configurations. + */ +enum rte_eth_nb_pools { + ETH_8_POOLS = 8, /**< 8 VMDq pools. */ + ETH_16_POOLS = 16, /**< 16 VMDq pools. */ + ETH_32_POOLS = 32, /**< 32 VMDq pools. */ + ETH_64_POOLS = 64 /**< 64 VMDq pools. */ +}; + +/* This structure may be extended in future. */ +struct rte_eth_dcb_rx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_dcb_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_dcb_tx_conf { + enum rte_eth_nb_tcs nb_tcs; /**< Possible DCB TCs, 4 or 8 TCs. */ + /** Traffic class each UP mapped to. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; +}; + +struct rte_eth_vmdq_tx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq mode, 64 pools. */ +}; + +/** + * A structure used to configure the VMDQ+DCB feature + * of an Ethernet port. + * + * Using this feature, packets are routed to a pool of queues, based + * on the vlan id in the vlan tag, and then to a specific queue within + * that pool, using the user priority vlan tag field. + * + * A default pool may be used, if desired, to route all traffic which + * does not match the vlan filter rules. + */ +struct rte_eth_vmdq_dcb_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< With DCB, 16 or 32 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ + uint8_t dcb_tc[ETH_DCB_NUM_USER_PRIORITIES]; + /**< Selects a queue in a pool */ +}; + +struct rte_eth_vmdq_rx_conf { + enum rte_eth_nb_pools nb_queue_pools; /**< VMDq only mode, 8 or 64 pools */ + uint8_t enable_default_pool; /**< If non-zero, use a default pool */ + uint8_t default_pool; /**< The default pool, if applicable */ + uint8_t enable_loop_back; /**< Enable VT loop back */ + uint8_t nb_pool_maps; /**< We can have up to 64 filters/mappings */ + uint32_t rx_mode; /**< Flags from ETH_VMDQ_ACCEPT_* */ + struct { + uint16_t vlan_id; /**< The vlan id of the received frame */ + uint64_t pools; /**< Bitmask of pools for packet rx */ + } pool_map[ETH_VMDQ_MAX_VLAN_FILTERS]; /**< VMDq vlan pool maps. */ +}; + +/** + * A structure used to configure the TX features of an Ethernet port. + */ +struct rte_eth_txmode { + enum rte_eth_tx_mq_mode mq_mode; /**< TX multi-queues mode. */ + + /* For i40e specifically */ + uint16_t pvid; + uint8_t hw_vlan_reject_tagged : 1, + /**< If set, reject sending out tagged pkts */ + hw_vlan_reject_untagged : 1, + /**< If set, reject sending out untagged pkts */ + hw_vlan_insert_pvid : 1; + /**< If set, enable port based VLAN insertion */ +}; + +/** + * A structure used to configure an RX ring of an Ethernet port. + */ +struct rte_eth_rxconf { + struct rte_eth_thresh rx_thresh; /**< RX ring threshold registers. */ + uint16_t rx_free_thresh; /**< Drives the freeing of RX descriptors. */ + uint8_t rx_drop_en; /**< Drop packets if no descriptors are available. */ + uint8_t rx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ +}; + +#define ETH_TXQ_FLAGS_NOMULTSEGS 0x0001 /**< nb_segs=1 for all mbufs */ +#define ETH_TXQ_FLAGS_NOREFCOUNT 0x0002 /**< refcnt can be ignored */ +#define ETH_TXQ_FLAGS_NOMULTMEMP 0x0004 /**< all bufs come from same mempool */ +#define ETH_TXQ_FLAGS_NOVLANOFFL 0x0100 /**< disable VLAN offload */ +#define ETH_TXQ_FLAGS_NOXSUMSCTP 0x0200 /**< disable SCTP checksum offload */ +#define ETH_TXQ_FLAGS_NOXSUMUDP 0x0400 /**< disable UDP checksum offload */ +#define ETH_TXQ_FLAGS_NOXSUMTCP 0x0800 /**< disable TCP checksum offload */ +#define ETH_TXQ_FLAGS_NOOFFLOADS \ + (ETH_TXQ_FLAGS_NOVLANOFFL | ETH_TXQ_FLAGS_NOXSUMSCTP | \ + ETH_TXQ_FLAGS_NOXSUMUDP | ETH_TXQ_FLAGS_NOXSUMTCP) +#define ETH_TXQ_FLAGS_NOXSUMS \ + (ETH_TXQ_FLAGS_NOXSUMSCTP | ETH_TXQ_FLAGS_NOXSUMUDP | \ + ETH_TXQ_FLAGS_NOXSUMTCP) +/** + * A structure used to configure a TX ring of an Ethernet port. + */ +struct rte_eth_txconf { + struct rte_eth_thresh tx_thresh; /**< TX ring threshold registers. */ + uint16_t tx_rs_thresh; /**< Drives the setting of RS bit on TXDs. */ + uint16_t tx_free_thresh; /**< Start freeing TX buffers if there are + less free descriptors than this value. */ + + uint32_t txq_flags; /**< Set flags for the Tx queue */ + uint8_t tx_deferred_start; /**< Do not start queue with rte_eth_dev_start(). */ +}; + +/** + * A structure contains information about HW descriptor ring limitations. + */ +struct rte_eth_desc_lim { + uint16_t nb_max; /**< Max allowed number of descriptors. */ + uint16_t nb_min; /**< Min allowed number of descriptors. */ + uint16_t nb_align; /**< Number of descriptors should be aligned to. */ +}; + +/** + * This enum indicates the flow control mode + */ +enum rte_eth_fc_mode { + RTE_FC_NONE = 0, /**< Disable flow control. */ + RTE_FC_RX_PAUSE, /**< RX pause frame, enable flowctrl on TX side. */ + RTE_FC_TX_PAUSE, /**< TX pause frame, enable flowctrl on RX side. */ + RTE_FC_FULL /**< Enable flow control on both side. */ +}; + +/** + * A structure used to configure Ethernet flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_fc_conf { + uint32_t high_water; /**< High threshold value to trigger XOFF */ + uint32_t low_water; /**< Low threshold value to trigger XON */ + uint16_t pause_time; /**< Pause quota in the Pause frame */ + uint16_t send_xon; /**< Is XON frame need be sent */ + enum rte_eth_fc_mode mode; /**< Link flow control mode */ + uint8_t mac_ctrl_frame_fwd; /**< Forward MAC control frames */ + uint8_t autoneg; /**< Use Pause autoneg */ +}; + +/** + * A structure used to configure Ethernet priority flow control parameter. + * These parameters will be configured into the register of the NIC. + * Please refer to the corresponding data sheet for proper value. + */ +struct rte_eth_pfc_conf { + struct rte_eth_fc_conf fc; /**< General flow control parameter. */ + uint8_t priority; /**< VLAN User Priority. */ +}; + +/** + * Memory space that can be configured to store Flow Director filters + * in the board memory. + */ +enum rte_fdir_pballoc_type { + RTE_FDIR_PBALLOC_64K = 0, /**< 64k. */ + RTE_FDIR_PBALLOC_128K, /**< 128k. */ + RTE_FDIR_PBALLOC_256K, /**< 256k. */ +}; + +/** + * Select report mode of FDIR hash information in RX descriptors. + */ +enum rte_fdir_status_mode { + RTE_FDIR_NO_REPORT_STATUS = 0, /**< Never report FDIR hash. */ + RTE_FDIR_REPORT_STATUS, /**< Only report FDIR hash for matching pkts. */ + RTE_FDIR_REPORT_STATUS_ALWAYS, /**< Always report FDIR hash. */ +}; + +/** + * A structure used to configure the Flow Director (FDIR) feature + * of an Ethernet port. + * + * If mode is RTE_FDIR_DISABLE, the pballoc value is ignored. + */ +struct rte_fdir_conf { + enum rte_fdir_mode mode; /**< Flow Director mode. */ + enum rte_fdir_pballoc_type pballoc; /**< Space for FDIR filters. */ + enum rte_fdir_status_mode status; /**< How to report FDIR hash. */ + /** RX queue of packets matching a "drop" filter in perfect mode. */ + uint8_t drop_queue; + struct rte_eth_fdir_masks mask; + struct rte_eth_fdir_flex_conf flex_conf; + /**< Flex payload configuration. */ +}; + +/** + * UDP tunneling configuration. + * Used to config the UDP port for a type of tunnel. + * NICs need the UDP port to identify the tunnel type. + * Normally a type of tunnel has a default UDP port, this structure can be used + * in case if the users want to change or support more UDP port. + */ +struct rte_eth_udp_tunnel { + uint16_t udp_port; /**< UDP port used for the tunnel. */ + uint8_t prot_type; /**< Tunnel type. Defined in rte_eth_tunnel_type. */ +}; + +/** + * A structure used to enable/disable specific device interrupts. + */ +struct rte_intr_conf { + /** enable/disable lsc interrupt. 0 (default) - disable, 1 enable */ + uint16_t lsc; + /** enable/disable rxq interrupt. 0 (default) - disable, 1 enable */ + uint16_t rxq; +}; + +/** + * A structure used to configure an Ethernet port. + * Depending upon the RX multi-queue mode, extra advanced + * configuration settings may be needed. + */ +struct rte_eth_conf { + uint32_t link_speeds; /**< bitmap of ETH_LINK_SPEED_XXX of speeds to be + used. ETH_LINK_SPEED_FIXED disables link + autonegotiation, and a unique speed shall be + set. Otherwise, the bitmap defines the set of + speeds to be advertised. If the special value + ETH_LINK_SPEED_AUTONEG (0) is used, all speeds + supported are advertised. */ + struct rte_eth_rxmode rxmode; /**< Port RX configuration. */ + struct rte_eth_txmode txmode; /**< Port TX configuration. */ + uint32_t lpbk_mode; /**< Loopback operation mode. By default the value + is 0, meaning the loopback mode is disabled. + Read the datasheet of given ethernet controller + for details. The possible values of this field + are defined in implementation of each driver. */ + struct { + struct rte_eth_rss_conf rss_conf; /**< Port RSS configuration */ + struct rte_eth_vmdq_dcb_conf vmdq_dcb_conf; + /**< Port vmdq+dcb configuration. */ + struct rte_eth_dcb_rx_conf dcb_rx_conf; + /**< Port dcb RX configuration. */ + struct rte_eth_vmdq_rx_conf vmdq_rx_conf; + /**< Port vmdq RX configuration. */ + } rx_adv_conf; /**< Port RX filtering configuration (union). */ + union { + struct rte_eth_vmdq_dcb_tx_conf vmdq_dcb_tx_conf; + /**< Port vmdq+dcb TX configuration. */ + struct rte_eth_dcb_tx_conf dcb_tx_conf; + /**< Port dcb TX configuration. */ + struct rte_eth_vmdq_tx_conf vmdq_tx_conf; + /**< Port vmdq TX configuration. */ + } tx_adv_conf; /**< Port TX DCB configuration (union). */ + /** Currently,Priority Flow Control(PFC) are supported,if DCB with PFC + is needed,and the variable must be set ETH_DCB_PFC_SUPPORT. */ + uint32_t dcb_capability_en; + struct rte_fdir_conf fdir_conf; /**< FDIR configuration. */ + struct rte_intr_conf intr_conf; /**< Interrupt mode configuration. */ +}; + +/** + * A structure used to retrieve the contextual information of + * an Ethernet device, such as the controlling driver of the device, + * its PCI context, etc... + */ + +/** + * RX offload capabilities of a device. + */ +#define DEV_RX_OFFLOAD_VLAN_STRIP 0x00000001 +#define DEV_RX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_RX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_RX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_RX_OFFLOAD_TCP_LRO 0x00000010 +#define DEV_RX_OFFLOAD_QINQ_STRIP 0x00000020 +#define DEV_RX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000040 + +/** + * TX offload capabilities of a device. + */ +#define DEV_TX_OFFLOAD_VLAN_INSERT 0x00000001 +#define DEV_TX_OFFLOAD_IPV4_CKSUM 0x00000002 +#define DEV_TX_OFFLOAD_UDP_CKSUM 0x00000004 +#define DEV_TX_OFFLOAD_TCP_CKSUM 0x00000008 +#define DEV_TX_OFFLOAD_SCTP_CKSUM 0x00000010 +#define DEV_TX_OFFLOAD_TCP_TSO 0x00000020 +#define DEV_TX_OFFLOAD_UDP_TSO 0x00000040 +#define DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM 0x00000080 /**< Used for tunneling packet. */ +#define DEV_TX_OFFLOAD_QINQ_INSERT 0x00000100 + +/** + * Ethernet device information + */ +struct rte_eth_dev_info { + struct rte_pci_device *pci_dev; /**< Device PCI information. */ + const char *driver_name; /**< Device Driver name. */ + unsigned int if_index; /**< Index to bound host interface, or 0 if none. + Use if_indextoname() to translate into an interface name. */ + uint32_t min_rx_bufsize; /**< Minimum size of RX buffer. */ + uint32_t max_rx_pktlen; /**< Maximum configurable length of RX pkt. */ + uint16_t max_rx_queues; /**< Maximum number of RX queues. */ + uint16_t max_tx_queues; /**< Maximum number of TX queues. */ + uint32_t max_mac_addrs; /**< Maximum number of MAC addresses. */ + uint32_t max_hash_mac_addrs; + /** Maximum number of hash MAC addresses for MTA and UTA. */ + uint16_t max_vfs; /**< Maximum number of VFs. */ + uint16_t max_vmdq_pools; /**< Maximum number of VMDq pools. */ + uint32_t rx_offload_capa; /**< Device RX offload capabilities. */ + uint32_t tx_offload_capa; /**< Device TX offload capabilities. */ + uint16_t reta_size; + /**< Device redirection table size, the total number of entries. */ + uint8_t hash_key_size; /**< Hash key size in bytes */ + /** Bit mask of RSS offloads, the bit offset also means flow type */ + uint64_t flow_type_rss_offloads; + struct rte_eth_rxconf default_rxconf; /**< Default RX configuration */ + struct rte_eth_txconf default_txconf; /**< Default TX configuration */ + uint16_t vmdq_queue_base; /**< First queue ID for VMDQ pools. */ + uint16_t vmdq_queue_num; /**< Queue number for VMDQ pools. */ + uint16_t vmdq_pool_base; /**< First ID of VMDQ pools. */ + struct rte_eth_desc_lim rx_desc_lim; /**< RX descriptors limits */ + struct rte_eth_desc_lim tx_desc_lim; /**< TX descriptors limits */ + uint32_t speed_capa; /**< Supported speeds bitmap (ETH_LINK_SPEED_). */ +}; + +/** + * Ethernet device RX queue information structure. + * Used to retieve information about configured queue. + */ +struct rte_eth_rxq_info { + struct rte_mempool *mp; /**< mempool used by that queue. */ + struct rte_eth_rxconf conf; /**< queue config parameters. */ + uint8_t scattered_rx; /**< scattered packets RX supported. */ + uint16_t nb_desc; /**< configured number of RXDs. */ +} __rte_cache_min_aligned; + +/** + * Ethernet device TX queue information structure. + * Used to retieve information about configured queue. + */ +struct rte_eth_txq_info { + struct rte_eth_txconf conf; /**< queue config parameters. */ + uint16_t nb_desc; /**< configured number of TXDs. */ +} __rte_cache_min_aligned; + +/** Maximum name length for extended statistics counters */ +#define RTE_ETH_XSTATS_NAME_SIZE 64 + +/** + * An Ethernet device extended statistic structure + * + * This structure is used by ethdev->eth_xstats_get() to provide + * statistics that are not provided in the generic rte_eth_stats + * structure. + */ +struct rte_eth_xstats { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + uint64_t value; +}; + +#define ETH_DCB_NUM_TCS 8 +#define ETH_MAX_VMDQ_POOL 64 + +/** + * A structure used to get the information of queue and + * TC mapping on both TX and RX paths. + */ +struct rte_eth_dcb_tc_queue_mapping { + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_rxq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; + /** rx queues assigned to tc per Pool */ + struct { + uint8_t base; + uint8_t nb_queue; + } tc_txq[ETH_MAX_VMDQ_POOL][ETH_DCB_NUM_TCS]; +}; + +/** + * A structure used to get the information of DCB. + * It includes TC UP mapping and queue TC mapping. + */ +struct rte_eth_dcb_info { + uint8_t nb_tcs; /**< number of TCs */ + uint8_t prio_tc[ETH_DCB_NUM_USER_PRIORITIES]; /**< Priority to tc */ + uint8_t tc_bws[ETH_DCB_NUM_TCS]; /**< TX BW percentage for each TC */ + /** rx queues assigned to tc */ + struct rte_eth_dcb_tc_queue_mapping tc_queue; +}; + +/** + * RX/TX queue states + */ +#define RTE_ETH_QUEUE_STATE_STOPPED 0 +#define RTE_ETH_QUEUE_STATE_STARTED 1 + +struct rte_eth_dev; + +struct rte_eth_dev_callback; +/** @internal Structure to keep track of registered callbacks */ +TAILQ_HEAD(rte_eth_dev_cb_list, rte_eth_dev_callback); + + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG +#define RTE_PMD_DEBUG_TRACE(...) \ + rte_pmd_debug_trace(__func__, __VA_ARGS__) +#else +#define RTE_PMD_DEBUG_TRACE(...) +#endif + + +/* Macros to check for valid port */ +#define RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, retval) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ + return retval; \ + } \ +} while (0) + +#define RTE_ETH_VALID_PORTID_OR_RET(port_id) do { \ + if (!rte_eth_dev_is_valid_port(port_id)) { \ + RTE_PMD_DEBUG_TRACE("Invalid port_id=%d\n", port_id); \ + return; \ + } \ +} while (0) + +/** + * l2 tunnel configuration. + */ + +/**< l2 tunnel enable mask */ +#define ETH_L2_TUNNEL_ENABLE_MASK 0x00000001 +/**< l2 tunnel insertion mask */ +#define ETH_L2_TUNNEL_INSERTION_MASK 0x00000002 +/**< l2 tunnel stripping mask */ +#define ETH_L2_TUNNEL_STRIPPING_MASK 0x00000004 +/**< l2 tunnel forwarding mask */ +#define ETH_L2_TUNNEL_FORWARDING_MASK 0x00000008 + +/* + * Definitions of all functions exported by an Ethernet driver through the + * the generic structure of type *eth_dev_ops* supplied in the *rte_eth_dev* + * structure associated with an Ethernet device. + */ + +typedef int (*eth_dev_configure_t)(struct rte_eth_dev *dev); +/**< @internal Ethernet device configuration. */ + +typedef int (*eth_dev_start_t)(struct rte_eth_dev *dev); +/**< @internal Function used to start a configured Ethernet device. */ + +typedef void (*eth_dev_stop_t)(struct rte_eth_dev *dev); +/**< @internal Function used to stop a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_up_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link up a configured Ethernet device. */ + +typedef int (*eth_dev_set_link_down_t)(struct rte_eth_dev *dev); +/**< @internal Function used to link down a configured Ethernet device. */ + +typedef void (*eth_dev_close_t)(struct rte_eth_dev *dev); +/**< @internal Function used to close a configured Ethernet device. */ + +typedef void (*eth_promiscuous_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_promiscuous_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable the RX promiscuous mode of an Ethernet device. */ + +typedef void (*eth_allmulticast_enable_t)(struct rte_eth_dev *dev); +/**< @internal Enable the receipt of all multicast packets by an Ethernet device. */ + +typedef void (*eth_allmulticast_disable_t)(struct rte_eth_dev *dev); +/**< @internal Disable the receipt of all multicast packets by an Ethernet device. */ + +typedef int (*eth_link_update_t)(struct rte_eth_dev *dev, + int wait_to_complete); +/**< @internal Get link speed, duplex mode and state (up/down) of an Ethernet device. */ + +typedef void (*eth_stats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_stats *igb_stats); +/**< @internal Get global I/O statistics of an Ethernet device. */ + +typedef void (*eth_stats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset global I/O statistics of an Ethernet device to 0. */ + +typedef int (*eth_xstats_get_t)(struct rte_eth_dev *dev, + struct rte_eth_xstats *stats, unsigned n); +/**< @internal Get extended stats of an Ethernet device. */ + +typedef void (*eth_xstats_reset_t)(struct rte_eth_dev *dev); +/**< @internal Reset extended stats of an Ethernet device. */ + +typedef int (*eth_queue_stats_mapping_set_t)(struct rte_eth_dev *dev, + uint16_t queue_id, + uint8_t stat_idx, + uint8_t is_rx); +/**< @internal Set a queue statistics mapping for a tx/rx queue of an Ethernet device. */ + +typedef void (*eth_dev_infos_get_t)(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info); +/**< @internal Get specific informations of an Ethernet device. */ + +typedef const uint32_t *(*eth_dev_supported_ptypes_get_t)(struct rte_eth_dev *dev); +/**< @internal Get supported ptypes of an Ethernet device. */ + +typedef int (*eth_queue_start_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Start rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_queue_stop_t)(struct rte_eth_dev *dev, + uint16_t queue_id); +/**< @internal Stop rx and tx of a queue of an Ethernet device. */ + +typedef int (*eth_rx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + uint16_t nb_rx_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); +/**< @internal Set up a receive queue of an Ethernet device. */ + +typedef int (*eth_tx_queue_setup_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, + uint16_t nb_tx_desc, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +/**< @internal Setup a transmit queue of an Ethernet device. */ + +typedef int (*eth_rx_enable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Enable interrupt of a receive queue of an Ethernet device. */ + +typedef int (*eth_rx_disable_intr_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Disable interrupt of a receive queue of an Ethernet device. */ + +typedef void (*eth_queue_release_t)(void *queue); +/**< @internal Release memory resources allocated by given RX/TX queue. */ + +typedef uint32_t (*eth_rx_queue_count_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id); +/**< @internal Get number of available descriptors on a receive queue of an Ethernet device. */ + +typedef int (*eth_rx_descriptor_done_t)(void *rxq, uint16_t offset); +/**< @internal Check DD bit of specific RX descriptor */ + +typedef void (*eth_rxq_info_get_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, struct rte_eth_rxq_info *qinfo); + +typedef void (*eth_txq_info_get_t)(struct rte_eth_dev *dev, + uint16_t tx_queue_id, struct rte_eth_txq_info *qinfo); + +typedef int (*mtu_set_t)(struct rte_eth_dev *dev, uint16_t mtu); +/**< @internal Set MTU. */ + +typedef int (*vlan_filter_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal filtering of a VLAN Tag Identifier by an Ethernet device. */ + +typedef int (*vlan_tpid_set_t)(struct rte_eth_dev *dev, + enum rte_vlan_type type, uint16_t tpid); +/**< @internal set the outer VLAN-TPID by an Ethernet device. */ + +typedef void (*vlan_offload_set_t)(struct rte_eth_dev *dev, int mask); +/**< @internal set VLAN offload function by an Ethernet device. */ + +typedef int (*vlan_pvid_set_t)(struct rte_eth_dev *dev, + uint16_t vlan_id, + int on); +/**< @internal set port based TX VLAN insertion by an Ethernet device. */ + +typedef void (*vlan_strip_queue_set_t)(struct rte_eth_dev *dev, + uint16_t rx_queue_id, + int on); +/**< @internal VLAN stripping enable/disable by an queue of Ethernet device. */ + +typedef uint16_t (*eth_rx_burst_t)(void *rxq, + struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); +/**< @internal Retrieve input packets from a receive queue of an Ethernet device. */ + +typedef uint16_t (*eth_tx_burst_t)(void *txq, + struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +/**< @internal Send output packets on a transmit queue of an Ethernet device. */ + +typedef int (*flow_ctrl_get_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Get current flow control parameter on an Ethernet device */ + +typedef int (*flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_fc_conf *fc_conf); +/**< @internal Setup flow control parameter on an Ethernet device */ + +typedef int (*priority_flow_ctrl_set_t)(struct rte_eth_dev *dev, + struct rte_eth_pfc_conf *pfc_conf); +/**< @internal Setup priority flow control parameter on an Ethernet device */ + +typedef int (*reta_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Update RSS redirection table on an Ethernet device */ + +typedef int (*reta_query_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); +/**< @internal Query RSS redirection table on an Ethernet device */ + +typedef int (*rss_hash_update_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Update RSS hash configuration of an Ethernet device */ + +typedef int (*rss_hash_conf_get_t)(struct rte_eth_dev *dev, + struct rte_eth_rss_conf *rss_conf); +/**< @internal Get current RSS hash configuration of an Ethernet device */ + +typedef int (*eth_dev_led_on_t)(struct rte_eth_dev *dev); +/**< @internal Turn on SW controllable LED on an Ethernet device */ + +typedef int (*eth_dev_led_off_t)(struct rte_eth_dev *dev); +/**< @internal Turn off SW controllable LED on an Ethernet device */ + +typedef void (*eth_mac_addr_remove_t)(struct rte_eth_dev *dev, uint32_t index); +/**< @internal Remove MAC address from receive address register */ + +typedef void (*eth_mac_addr_add_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint32_t index, + uint32_t vmdq); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef void (*eth_mac_addr_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr); +/**< @internal Set a MAC address into Receive Address Address Register */ + +typedef int (*eth_uc_hash_table_set_t)(struct rte_eth_dev *dev, + struct ether_addr *mac_addr, + uint8_t on); +/**< @internal Set a Unicast Hash bitmap */ + +typedef int (*eth_uc_all_hash_table_set_t)(struct rte_eth_dev *dev, + uint8_t on); +/**< @internal Set all Unicast Hash bitmap */ + +typedef int (*eth_set_vf_rx_mode_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint16_t rx_mode, + uint8_t on); +/**< @internal Set a VF receive mode */ + +typedef int (*eth_set_vf_rx_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint8_t on); +/**< @internal Set a VF receive mode */ + +typedef int (*eth_set_vf_tx_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint8_t on); +/**< @internal Enable or disable a VF transmit */ + +typedef int (*eth_set_vf_vlan_filter_t)(struct rte_eth_dev *dev, + uint16_t vlan, + uint64_t vf_mask, + uint8_t vlan_on); +/**< @internal Set VF VLAN pool filter */ + +typedef int (*eth_set_queue_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t queue_idx, + uint16_t tx_rate); +/**< @internal Set queue TX rate */ + +typedef int (*eth_set_vf_rate_limit_t)(struct rte_eth_dev *dev, + uint16_t vf, + uint16_t tx_rate, + uint64_t q_msk); +/**< @internal Set VF TX rate */ + +typedef int (*eth_mirror_rule_set_t)(struct rte_eth_dev *dev, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); +/**< @internal Add a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_mirror_rule_reset_t)(struct rte_eth_dev *dev, + uint8_t rule_id); +/**< @internal Remove a traffic mirroring rule on an Ethernet device */ + +typedef int (*eth_udp_tunnel_port_add_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Add tunneling UDP port */ + +typedef int (*eth_udp_tunnel_port_del_t)(struct rte_eth_dev *dev, + struct rte_eth_udp_tunnel *tunnel_udp); +/**< @internal Delete tunneling UDP port */ + +typedef int (*eth_set_mc_addr_list_t)(struct rte_eth_dev *dev, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); +/**< @internal set the list of multicast addresses on an Ethernet device */ + +typedef int (*eth_timesync_enable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to enable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_disable_t)(struct rte_eth_dev *dev); +/**< @internal Function used to disable IEEE1588/802.1AS timestamping. */ + +typedef int (*eth_timesync_read_rx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp, + uint32_t flags); +/**< @internal Function used to read an RX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_read_tx_timestamp_t)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to read a TX IEEE1588/802.1AS timestamp. */ + +typedef int (*eth_timesync_adjust_time)(struct rte_eth_dev *dev, int64_t); +/**< @internal Function used to adjust the device clock */ + +typedef int (*eth_timesync_read_time)(struct rte_eth_dev *dev, + struct timespec *timestamp); +/**< @internal Function used to get time from the device clock. */ + +typedef int (*eth_timesync_write_time)(struct rte_eth_dev *dev, + const struct timespec *timestamp); +/**< @internal Function used to get time from the device clock */ + +typedef int (*eth_get_reg_length_t)(struct rte_eth_dev *dev); +/**< @internal Retrieve device register count */ + +typedef int (*eth_get_reg_t)(struct rte_eth_dev *dev, + struct rte_dev_reg_info *info); +/**< @internal Retrieve registers */ + +typedef int (*eth_get_eeprom_length_t)(struct rte_eth_dev *dev); +/**< @internal Retrieve eeprom size */ + +typedef int (*eth_get_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Retrieve eeprom data */ + +typedef int (*eth_set_eeprom_t)(struct rte_eth_dev *dev, + struct rte_dev_eeprom_info *info); +/**< @internal Program eeprom data */ + +typedef int (*eth_l2_tunnel_eth_type_conf_t) + (struct rte_eth_dev *dev, struct rte_eth_l2_tunnel_conf *l2_tunnel); +/**< @internal config l2 tunnel ether type */ + +typedef int (*eth_l2_tunnel_offload_set_t) + (struct rte_eth_dev *dev, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en); +/**< @internal enable/disable the l2 tunnel offload functions */ + +#ifdef RTE_NIC_BYPASS + +enum { + RTE_BYPASS_MODE_NONE, + RTE_BYPASS_MODE_NORMAL, + RTE_BYPASS_MODE_BYPASS, + RTE_BYPASS_MODE_ISOLATE, + RTE_BYPASS_MODE_NUM, +}; + +#define RTE_BYPASS_MODE_VALID(x) \ + ((x) > RTE_BYPASS_MODE_NONE && (x) < RTE_BYPASS_MODE_NUM) + +enum { + RTE_BYPASS_EVENT_NONE, + RTE_BYPASS_EVENT_START, + RTE_BYPASS_EVENT_OS_ON = RTE_BYPASS_EVENT_START, + RTE_BYPASS_EVENT_POWER_ON, + RTE_BYPASS_EVENT_OS_OFF, + RTE_BYPASS_EVENT_POWER_OFF, + RTE_BYPASS_EVENT_TIMEOUT, + RTE_BYPASS_EVENT_NUM +}; + +#define RTE_BYPASS_EVENT_VALID(x) \ + ((x) > RTE_BYPASS_EVENT_NONE && (x) < RTE_BYPASS_MODE_NUM) + +enum { + RTE_BYPASS_TMT_OFF, /* timeout disabled. */ + RTE_BYPASS_TMT_1_5_SEC, /* timeout for 1.5 seconds */ + RTE_BYPASS_TMT_2_SEC, /* timeout for 2 seconds */ + RTE_BYPASS_TMT_3_SEC, /* timeout for 3 seconds */ + RTE_BYPASS_TMT_4_SEC, /* timeout for 4 seconds */ + RTE_BYPASS_TMT_8_SEC, /* timeout for 8 seconds */ + RTE_BYPASS_TMT_16_SEC, /* timeout for 16 seconds */ + RTE_BYPASS_TMT_32_SEC, /* timeout for 32 seconds */ + RTE_BYPASS_TMT_NUM +}; + +#define RTE_BYPASS_TMT_VALID(x) \ + ((x) == RTE_BYPASS_TMT_OFF || \ + ((x) > RTE_BYPASS_TMT_OFF && (x) < RTE_BYPASS_TMT_NUM)) + +typedef void (*bypass_init_t)(struct rte_eth_dev *dev); +typedef int32_t (*bypass_state_set_t)(struct rte_eth_dev *dev, uint32_t *new_state); +typedef int32_t (*bypass_state_show_t)(struct rte_eth_dev *dev, uint32_t *state); +typedef int32_t (*bypass_event_set_t)(struct rte_eth_dev *dev, uint32_t state, uint32_t event); +typedef int32_t (*bypass_event_show_t)(struct rte_eth_dev *dev, uint32_t event_shift, uint32_t *event); +typedef int32_t (*bypass_wd_timeout_set_t)(struct rte_eth_dev *dev, uint32_t timeout); +typedef int32_t (*bypass_wd_timeout_show_t)(struct rte_eth_dev *dev, uint32_t *wd_timeout); +typedef int32_t (*bypass_ver_show_t)(struct rte_eth_dev *dev, uint32_t *ver); +typedef int32_t (*bypass_wd_reset_t)(struct rte_eth_dev *dev); +#endif + +typedef int (*eth_filter_ctrl_t)(struct rte_eth_dev *dev, + enum rte_filter_type filter_type, + enum rte_filter_op filter_op, + void *arg); +/**< @internal Take operations to assigned filter type on an Ethernet device */ + +typedef int (*eth_get_dcb_info)(struct rte_eth_dev *dev, + struct rte_eth_dcb_info *dcb_info); +/**< @internal Get dcb information on an Ethernet device */ + +/** + * @internal A structure containing the functions exported by an Ethernet driver. + */ +struct eth_dev_ops { + eth_dev_configure_t dev_configure; /**< Configure device. */ + eth_dev_start_t dev_start; /**< Start device. */ + eth_dev_stop_t dev_stop; /**< Stop device. */ + eth_dev_set_link_up_t dev_set_link_up; /**< Device link up. */ + eth_dev_set_link_down_t dev_set_link_down; /**< Device link down. */ + eth_dev_close_t dev_close; /**< Close device. */ + eth_promiscuous_enable_t promiscuous_enable; /**< Promiscuous ON. */ + eth_promiscuous_disable_t promiscuous_disable;/**< Promiscuous OFF. */ + eth_allmulticast_enable_t allmulticast_enable;/**< RX multicast ON. */ + eth_allmulticast_disable_t allmulticast_disable;/**< RX multicast OF. */ + eth_link_update_t link_update; /**< Get device link state. */ + eth_stats_get_t stats_get; /**< Get generic device statistics. */ + eth_stats_reset_t stats_reset; /**< Reset generic device statistics. */ + eth_xstats_get_t xstats_get; /**< Get extended device statistics. */ + eth_xstats_reset_t xstats_reset; /**< Reset extended device statistics. */ + eth_queue_stats_mapping_set_t queue_stats_mapping_set; + /**< Configure per queue stat counter mapping. */ + eth_dev_infos_get_t dev_infos_get; /**< Get device info. */ + eth_dev_supported_ptypes_get_t dev_supported_ptypes_get; + /**< Get packet types supported and identified by device*/ + mtu_set_t mtu_set; /**< Set MTU. */ + vlan_filter_set_t vlan_filter_set; /**< Filter VLAN Setup. */ + vlan_tpid_set_t vlan_tpid_set; /**< Outer VLAN TPID Setup. */ + vlan_strip_queue_set_t vlan_strip_queue_set; /**< VLAN Stripping on queue. */ + vlan_offload_set_t vlan_offload_set; /**< Set VLAN Offload. */ + vlan_pvid_set_t vlan_pvid_set; /**< Set port based TX VLAN insertion */ + eth_queue_start_t rx_queue_start;/**< Start RX for a queue.*/ + eth_queue_stop_t rx_queue_stop;/**< Stop RX for a queue.*/ + eth_queue_start_t tx_queue_start;/**< Start TX for a queue.*/ + eth_queue_stop_t tx_queue_stop;/**< Stop TX for a queue.*/ + eth_rx_queue_setup_t rx_queue_setup;/**< Set up device RX queue.*/ + eth_queue_release_t rx_queue_release;/**< Release RX queue.*/ + eth_rx_queue_count_t rx_queue_count; /**< Get Rx queue count. */ + eth_rx_descriptor_done_t rx_descriptor_done; /**< Check rxd DD bit */ + /**< Enable Rx queue interrupt. */ + eth_rx_enable_intr_t rx_queue_intr_enable; + /**< Disable Rx queue interrupt.*/ + eth_rx_disable_intr_t rx_queue_intr_disable; + eth_tx_queue_setup_t tx_queue_setup;/**< Set up device TX queue.*/ + eth_queue_release_t tx_queue_release;/**< Release TX queue.*/ + eth_dev_led_on_t dev_led_on; /**< Turn on LED. */ + eth_dev_led_off_t dev_led_off; /**< Turn off LED. */ + flow_ctrl_get_t flow_ctrl_get; /**< Get flow control. */ + flow_ctrl_set_t flow_ctrl_set; /**< Setup flow control. */ + priority_flow_ctrl_set_t priority_flow_ctrl_set; /**< Setup priority flow control.*/ + eth_mac_addr_remove_t mac_addr_remove; /**< Remove MAC address */ + eth_mac_addr_add_t mac_addr_add; /**< Add a MAC address */ + eth_mac_addr_set_t mac_addr_set; /**< Set a MAC address */ + eth_uc_hash_table_set_t uc_hash_table_set; /**< Set Unicast Table Array */ + eth_uc_all_hash_table_set_t uc_all_hash_table_set; /**< Set Unicast hash bitmap */ + eth_mirror_rule_set_t mirror_rule_set; /**< Add a traffic mirror rule.*/ + eth_mirror_rule_reset_t mirror_rule_reset; /**< reset a traffic mirror rule.*/ + eth_set_vf_rx_mode_t set_vf_rx_mode; /**< Set VF RX mode */ + eth_set_vf_rx_t set_vf_rx; /**< enable/disable a VF receive */ + eth_set_vf_tx_t set_vf_tx; /**< enable/disable a VF transmit */ + eth_set_vf_vlan_filter_t set_vf_vlan_filter; /**< Set VF VLAN filter */ + /** Add UDP tunnel port. */ + eth_udp_tunnel_port_add_t udp_tunnel_port_add; + /** Del UDP tunnel port. */ + eth_udp_tunnel_port_del_t udp_tunnel_port_del; + eth_set_queue_rate_limit_t set_queue_rate_limit; /**< Set queue rate limit */ + eth_set_vf_rate_limit_t set_vf_rate_limit; /**< Set VF rate limit */ + /** Update redirection table. */ + reta_update_t reta_update; + /** Query redirection table. */ + reta_query_t reta_query; + + eth_get_reg_length_t get_reg_length; + /**< Get # of registers */ + eth_get_reg_t get_reg; + /**< Get registers */ + eth_get_eeprom_length_t get_eeprom_length; + /**< Get eeprom length */ + eth_get_eeprom_t get_eeprom; + /**< Get eeprom data */ + eth_set_eeprom_t set_eeprom; + /**< Set eeprom */ + /* bypass control */ +#ifdef RTE_NIC_BYPASS + bypass_init_t bypass_init; + bypass_state_set_t bypass_state_set; + bypass_state_show_t bypass_state_show; + bypass_event_set_t bypass_event_set; + bypass_event_show_t bypass_event_show; + bypass_wd_timeout_set_t bypass_wd_timeout_set; + bypass_wd_timeout_show_t bypass_wd_timeout_show; + bypass_ver_show_t bypass_ver_show; + bypass_wd_reset_t bypass_wd_reset; +#endif + + /** Configure RSS hash protocols. */ + rss_hash_update_t rss_hash_update; + /** Get current RSS hash configuration. */ + rss_hash_conf_get_t rss_hash_conf_get; + eth_filter_ctrl_t filter_ctrl; + /**< common filter control. */ + eth_set_mc_addr_list_t set_mc_addr_list; /**< set list of mcast addrs */ + eth_rxq_info_get_t rxq_info_get; + /**< retrieve RX queue information. */ + eth_txq_info_get_t txq_info_get; + /**< retrieve TX queue information. */ + /** Turn IEEE1588/802.1AS timestamping on. */ + eth_timesync_enable_t timesync_enable; + /** Turn IEEE1588/802.1AS timestamping off. */ + eth_timesync_disable_t timesync_disable; + /** Read the IEEE1588/802.1AS RX timestamp. */ + eth_timesync_read_rx_timestamp_t timesync_read_rx_timestamp; + /** Read the IEEE1588/802.1AS TX timestamp. */ + eth_timesync_read_tx_timestamp_t timesync_read_tx_timestamp; + + /** Get DCB information */ + eth_get_dcb_info get_dcb_info; + /** Adjust the device clock.*/ + eth_timesync_adjust_time timesync_adjust_time; + /** Get the device clock time. */ + eth_timesync_read_time timesync_read_time; + /** Set the device clock time. */ + eth_timesync_write_time timesync_write_time; + /** Config ether type of l2 tunnel */ + eth_l2_tunnel_eth_type_conf_t l2_tunnel_eth_type_conf; + /** Enable/disable l2 tunnel offload functions */ + eth_l2_tunnel_offload_set_t l2_tunnel_offload_set; +}; + +/** + * Function type used for RX packet processing packet callbacks. + * + * The callback function is called on RX with a burst of packets that have + * been received on the given port and queue. + * + * @param port + * The Ethernet port on which RX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to receive the packets. + * @param pkts + * The burst of packets that have just been received. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param max_pkts + * The max number of packets that can be stored in the "pkts" array. + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets returned to the user. + */ +typedef uint16_t (*rte_rx_callback_fn)(uint8_t port, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, uint16_t max_pkts, + void *user_param); + +/** + * Function type used for TX packet processing packet callbacks. + * + * The callback function is called on TX with a burst of packets immediately + * before the packets are put onto the hardware queue for transmission. + * + * @param port + * The Ethernet port on which TX is being performed. + * @param queue + * The queue on the Ethernet port which is being used to transmit the packets. + * @param pkts + * The burst of packets that are about to be transmitted. + * @param nb_pkts + * The number of packets in the burst pointed to by "pkts". + * @param user_param + * The arbitrary user parameter passed in by the application when the callback + * was originally configured. + * @return + * The number of packets to be written to the NIC. + */ +typedef uint16_t (*rte_tx_callback_fn)(uint8_t port, uint16_t queue, + struct rte_mbuf *pkts[], uint16_t nb_pkts, void *user_param); + +/** + * @internal + * Structure used to hold information about the callbacks to be called for a + * queue on RX and TX. + */ +struct rte_eth_rxtx_callback { + struct rte_eth_rxtx_callback *next; + union{ + rte_rx_callback_fn rx; + rte_tx_callback_fn tx; + } fn; + void *param; +}; + +/** + * The eth device type. + */ +enum rte_eth_dev_type { + RTE_ETH_DEV_UNKNOWN, /**< unknown device type */ + RTE_ETH_DEV_PCI, + /**< Physical function and Virtual function of PCI devices */ + RTE_ETH_DEV_VIRTUAL, /**< non hardware device */ + RTE_ETH_DEV_MAX /**< max value of this enum */ +}; + +/** + * @internal + * The generic data structure associated with each ethernet device. + * + * Pointers to burst-oriented packet receive and transmit functions are + * located at the beginning of the structure, along with the pointer to + * where all the data elements for the particular device are stored in shared + * memory. This split allows the function pointer and driver data to be per- + * process, while the actual configuration data for the device is shared. + */ +struct rte_eth_dev { + eth_rx_burst_t rx_pkt_burst; /**< Pointer to PMD receive function. */ + eth_tx_burst_t tx_pkt_burst; /**< Pointer to PMD transmit function. */ + struct rte_eth_dev_data *data; /**< Pointer to device data */ + const struct eth_driver *driver;/**< Driver for this device */ + const struct eth_dev_ops *dev_ops; /**< Functions exported by PMD */ + struct rte_pci_device *pci_dev; /**< PCI info. supplied by probing */ + /** User application callbacks for NIC interrupts */ + struct rte_eth_dev_cb_list link_intr_cbs; + /** + * User-supplied functions called from rx_burst to post-process + * received packets before passing them to the user + */ + struct rte_eth_rxtx_callback *post_rx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + /** + * User-supplied functions called from tx_burst to pre-process + * received packets before passing them to the driver for transmission. + */ + struct rte_eth_rxtx_callback *pre_tx_burst_cbs[RTE_MAX_QUEUES_PER_PORT]; + uint8_t attached; /**< Flag indicating the port is attached */ + enum rte_eth_dev_type dev_type; /**< Flag indicating the device type */ +}; + +struct rte_eth_dev_sriov { + uint8_t active; /**< SRIOV is active with 16, 32 or 64 pools */ + uint8_t nb_q_per_pool; /**< rx queue number per pool */ + uint16_t def_vmdq_idx; /**< Default pool num used for PF */ + uint16_t def_pool_q_idx; /**< Default pool queue start reg index */ +}; +#define RTE_ETH_DEV_SRIOV(dev) ((dev)->data->sriov) + +#define RTE_ETH_NAME_MAX_LEN (32) + +/** + * @internal + * The data part, with no function pointers, associated with each ethernet device. + * + * This structure is safe to place in shared memory to be common among different + * processes in a multi-process configuration. + */ +struct rte_eth_dev_data { + char name[RTE_ETH_NAME_MAX_LEN]; /**< Unique identifier name */ + + void **rx_queues; /**< Array of pointers to RX queues. */ + void **tx_queues; /**< Array of pointers to TX queues. */ + uint16_t nb_rx_queues; /**< Number of RX queues. */ + uint16_t nb_tx_queues; /**< Number of TX queues. */ + + struct rte_eth_dev_sriov sriov; /**< SRIOV data */ + + void *dev_private; /**< PMD-specific private data */ + + struct rte_eth_link dev_link; + /**< Link-level information & status */ + + struct rte_eth_conf dev_conf; /**< Configuration applied to device. */ + uint16_t mtu; /**< Maximum Transmission Unit. */ + + uint32_t min_rx_buf_size; + /**< Common rx buffer size handled by all queues */ + + uint64_t rx_mbuf_alloc_failed; /**< RX ring mbuf allocation failures. */ + struct ether_addr* mac_addrs;/**< Device Ethernet Link address. */ + uint64_t mac_pool_sel[ETH_NUM_RECEIVE_MAC_ADDR]; + /** bitmap array of associating Ethernet MAC addresses to pools */ + struct ether_addr* hash_mac_addrs; + /** Device Ethernet MAC addresses of hash filtering. */ + uint8_t port_id; /**< Device [external] port identifier. */ + uint8_t promiscuous : 1, /**< RX promiscuous mode ON(1) / OFF(0). */ + scattered_rx : 1, /**< RX of scattered packets is ON(1) / OFF(0) */ + all_multicast : 1, /**< RX all multicast mode ON(1) / OFF(0). */ + dev_started : 1, /**< Device state: STARTED(1) / STOPPED(0). */ + lro : 1; /**< RX LRO is ON(1) / OFF(0) */ + uint8_t rx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint8_t tx_queue_state[RTE_MAX_QUEUES_PER_PORT]; + /** Queues state: STARTED(1) / STOPPED(0) */ + uint32_t dev_flags; /**< Capabilities */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ + int numa_node; /**< NUMA node connection */ + const char *drv_name; /**< Driver name */ +}; + +/** Device supports hotplug detach */ +#define RTE_ETH_DEV_DETACHABLE 0x0001 +/** Device supports link state interrupt */ +#define RTE_ETH_DEV_INTR_LSC 0x0002 +/** Device is a bonded slave */ +#define RTE_ETH_DEV_BONDED_SLAVE 0x0004 + +/** + * @internal + * The pool of *rte_eth_dev* structures. The size of the pool + * is configured at compile-time in the file. + */ +extern struct rte_eth_dev rte_eth_devices[]; + +/** + * Get the total number of Ethernet devices that have been successfully + * initialized by the [matching] Ethernet driver during the PCI probing phase. + * All devices whose port identifier is in the range + * [0, rte_eth_dev_count() - 1] can be operated on by network applications + * immediately after invoking rte_eal_init(). + * If the application unplugs a port using hotplug function, The enabled port + * numbers may be noncontiguous. In the case, the applications need to manage + * enabled port by themselves. + * + * @return + * - The total number of usable Ethernet devices. + */ +uint8_t rte_eth_dev_count(void); + +/** + * @internal + * Returns a ethdev slot specified by the unique identifier name. + * + * @param name + * The pointer to the Unique identifier name for each Ethernet device + * @return + * - The pointer to the ethdev slot, on success. NULL on error + */ +struct rte_eth_dev *rte_eth_dev_allocated(const char *name); + +/** + * @internal + * Allocates a new ethdev slot for an ethernet device and returns the pointer + * to that slot for the driver to use. + * + * @param name Unique identifier name for each Ethernet device + * @param type Device type of this Ethernet device + * @return + * - Slot in the rte_dev_devices array for a new device; + */ +struct rte_eth_dev *rte_eth_dev_allocate(const char *name, + enum rte_eth_dev_type type); + +/** + * @internal + * Release the specified ethdev port. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @return + * - 0 on success, negative on error + */ +int rte_eth_dev_release_port(struct rte_eth_dev *eth_dev); + +/** + * Attach a new Ethernet device specified by aruguments. + * + * @param devargs + * A pointer to a strings array describing the new device + * to be attached. The strings should be a pci address like + * '0000:01:00.0' or virtual device name like 'eth_pcap0'. + * @param port_id + * A pointer to a port identifier actually attached. + * @return + * 0 on success and port_id is filled, negative on error + */ +int rte_eth_dev_attach(const char *devargs, uint8_t *port_id); + +/** + * Detach a Ethernet device specified by port identifier. + * This function must be called when the device is in the + * closed state. + * + * @param port_id + * The port identifier of the device to detach. + * @param devname + * A pointer to a device name actually detached. + * @return + * 0 on success and devname is filled, negative on error + */ +int rte_eth_dev_detach(uint8_t port_id, char *devname); + +struct eth_driver; +/** + * @internal + * Initialization function of an Ethernet driver invoked for each matching + * Ethernet PCI device detected during the PCI probing phase. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * associated with the matching device and which have been [automatically] + * allocated in the *rte_eth_devices* array. + * The *eth_dev* structure is supplied to the driver initialization function + * with the following fields already initialized: + * + * - *pci_dev*: Holds the pointers to the *rte_pci_device* structure which + * contains the generic PCI information of the matching device. + * + * - *driver*: Holds the pointer to the *eth_driver* structure. + * + * - *dev_private*: Holds a pointer to the device private data structure. + * + * - *mtu*: Contains the default Ethernet maximum frame length (1500). + * + * - *port_id*: Contains the port index of the device (actually the index + * of the *eth_dev* structure in the *rte_eth_devices* array). + * + * @return + * - 0: Success, the device is properly initialized by the driver. + * In particular, the driver MUST have set up the *dev_ops* pointer + * of the *eth_dev* structure. + * - <0: Error code of the device initialization failure. + */ +typedef int (*eth_dev_init_t)(struct rte_eth_dev *eth_dev); + +/** + * @internal + * Finalization function of an Ethernet driver invoked for each matching + * Ethernet PCI device detected during the PCI closing phase. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * associated with the matching device and which have been [automatically] + * allocated in the *rte_eth_devices* array. + * @return + * - 0: Success, the device is properly finalized by the driver. + * In particular, the driver MUST free the *dev_ops* pointer + * of the *eth_dev* structure. + * - <0: Error code of the device initialization failure. + */ +typedef int (*eth_dev_uninit_t)(struct rte_eth_dev *eth_dev); + +/** + * @internal + * The structure associated with a PMD Ethernet driver. + * + * Each Ethernet driver acts as a PCI driver and is represented by a generic + * *eth_driver* structure that holds: + * + * - An *rte_pci_driver* structure (which must be the first field). + * + * - The *eth_dev_init* function invoked for each matching PCI device. + * + * - The *eth_dev_uninit* function invoked for each matching PCI device. + * + * - The size of the private data to allocate for each matching device. + */ +struct eth_driver { + struct rte_pci_driver pci_drv; /**< The PMD is also a PCI driver. */ + eth_dev_init_t eth_dev_init; /**< Device init function. */ + eth_dev_uninit_t eth_dev_uninit; /**< Device uninit function. */ + unsigned int dev_private_size; /**< Size of device private data. */ +}; + +/** + * @internal + * A function invoked by the initialization function of an Ethernet driver + * to simultaneously register itself as a PCI driver and as an Ethernet + * Poll Mode Driver (PMD). + * + * @param eth_drv + * The pointer to the *eth_driver* structure associated with + * the Ethernet driver. + */ +void rte_eth_driver_register(struct eth_driver *eth_drv); + +/** + * Convert a numerical speed in Mbps to a bitmap flag that can be used in + * the bitmap link_speeds of the struct rte_eth_conf + * + * @param speed + * Numerical speed value in Mbps + * @param duplex + * ETH_LINK_[HALF/FULL]_DUPLEX (only for 10/100M speeds) + * @return + * 0 if the speed cannot be mapped + */ +uint32_t rte_eth_speed_bitflag(uint32_t speed, int duplex); + +/** + * Configure an Ethernet device. + * This function must be invoked first before any other function in the + * Ethernet API. This function can also be re-invoked when a device is in the + * stopped state. + * + * @param port_id + * The port identifier of the Ethernet device to configure. + * @param nb_rx_queue + * The number of receive queues to set up for the Ethernet device. + * @param nb_tx_queue + * The number of transmit queues to set up for the Ethernet device. + * @param eth_conf + * The pointer to the configuration data to be used for the Ethernet device. + * The *rte_eth_conf* structure includes: + * - the hardware offload features to activate, with dedicated fields for + * each statically configurable offload hardware feature provided by + * Ethernet devices, such as IP checksum or VLAN tag stripping for + * example. + * - the Receive Side Scaling (RSS) configuration when using multiple RX + * queues per port. + * + * Embedding all configuration information in a single data structure + * is the more flexible method that allows the addition of new features + * without changing the syntax of the API. + * @return + * - 0: Success, device configured. + * - <0: Error code returned by the driver configuration function. + */ +int rte_eth_dev_configure(uint8_t port_id, uint16_t nb_rx_queue, + uint16_t nb_tx_queue, const struct rte_eth_conf *eth_conf); + +/** + * Allocate and set up a receive queue for an Ethernet device. + * + * The function allocates a contiguous block of memory for *nb_rx_desc* + * receive descriptors from a memory zone associated with *socket_id* + * and initializes each receive descriptor with a network buffer allocated + * from the memory pool *mb_pool*. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue to set up. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_rx_desc + * The number of receive descriptors to allocate for the receive ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * The value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the receive descriptors of the ring. + * @param rx_conf + * The pointer to the configuration data to be used for the receive queue. + * NULL value is allowed, in which case default RX configuration + * will be used. + * The *rx_conf* structure contains an *rx_thresh* structure with the values + * of the Prefetch, Host, and Write-Back threshold registers of the receive + * ring. + * @param mb_pool + * The pointer to the memory pool from which to allocate *rte_mbuf* network + * memory buffers to populate each descriptor of the receive ring. + * @return + * - 0: Success, receive queue correctly set up. + * - -EINVAL: The size of network buffers which can be allocated from the + * memory pool does not fit the various buffer sizes allowed by the + * device controller. + * - -ENOMEM: Unable to allocate the receive ring descriptors or to + * allocate network memory buffers from the memory pool when + * initializing receive descriptors. + */ +int rte_eth_rx_queue_setup(uint8_t port_id, uint16_t rx_queue_id, + uint16_t nb_rx_desc, unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mb_pool); + +/** + * Allocate and set up a transmit queue for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue to set up. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param nb_tx_desc + * The number of transmit descriptors to allocate for the transmit ring. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + * Its value can be *SOCKET_ID_ANY* if there is no NUMA constraint for + * the DMA memory allocated for the transmit descriptors of the ring. + * @param tx_conf + * The pointer to the configuration data to be used for the transmit queue. + * NULL value is allowed, in which case default RX configuration + * will be used. + * The *tx_conf* structure contains the following data: + * - The *tx_thresh* structure with the values of the Prefetch, Host, and + * Write-Back threshold registers of the transmit ring. + * When setting Write-Back threshold to the value greater then zero, + * *tx_rs_thresh* value should be explicitly set to one. + * - The *tx_free_thresh* value indicates the [minimum] number of network + * buffers that must be pending in the transmit ring to trigger their + * [implicit] freeing by the driver transmit function. + * - The *tx_rs_thresh* value indicates the [minimum] number of transmit + * descriptors that must be pending in the transmit ring before setting the + * RS bit on a descriptor by the driver transmit function. + * The *tx_rs_thresh* value should be less or equal then + * *tx_free_thresh* value, and both of them should be less then + * *nb_tx_desc* - 3. + * - The *txq_flags* member contains flags to pass to the TX queue setup + * function to configure the behavior of the TX queue. This should be set + * to 0 if no special configuration is required. + * + * Note that setting *tx_free_thresh* or *tx_rs_thresh* value to 0 forces + * the transmit function to use default values. + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -ENOMEM: Unable to allocate the transmit ring descriptors. + */ +int rte_eth_tx_queue_setup(uint8_t port_id, uint16_t tx_queue_id, + uint16_t nb_tx_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); + +/* + * Return the NUMA socket to which an Ethernet device is connected + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * The NUMA socket id to which the Ethernet device is connected or + * a default of zero if the socket could not be determined. + * -1 is returned is the port_id value is out of range. + */ +int rte_eth_dev_socket_id(uint8_t port_id); + +/* + * Check if port_id of device is attached + * + * @param port_id + * The port identifier of the Ethernet device + * @return + * - 0 if port is out of range or not attached + * - 1 if device is attached + */ +int rte_eth_dev_is_valid_port(uint8_t port_id); + +/* + * Allocate mbuf from mempool, setup the DMA physical address + * and then start RX for specified queue of a port. It is used + * when rx_deferred_start flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_rx_queue_start(uint8_t port_id, uint16_t rx_queue_id); + +/* + * Stop specified RX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param rx_queue_id + * The index of the rx queue to update the ring. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_rx_queue_stop(uint8_t port_id, uint16_t rx_queue_id); + +/* + * Start TX for specified queue of a port. It is used when tx_deferred_start + * flag of the specified queue is true. + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_tx_queue_start(uint8_t port_id, uint16_t tx_queue_id); + +/* + * Stop specified TX queue of a port + * + * @param port_id + * The port identifier of the Ethernet device + * @param tx_queue_id + * The index of the tx queue to update the ring. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - 0: Success, the transmit queue is correctly set up. + * - -EINVAL: The port_id or the queue_id out of range. + * - -ENOTSUP: The function not supported in PMD driver. + */ +int rte_eth_dev_tx_queue_stop(uint8_t port_id, uint16_t tx_queue_id); + + + +/** + * Start an Ethernet device. + * + * The device start step is the last one and consists of setting the configured + * offload features and in starting the transmit and the receive units of the + * device. + * On success, all basic functions exported by the Ethernet API (link status, + * receive/transmit, and so on) can be invoked. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device started. + * - <0: Error code of the driver device start function. + */ +int rte_eth_dev_start(uint8_t port_id); + +/** + * Stop an Ethernet device. The device can be restarted with a call to + * rte_eth_dev_start() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_dev_stop(uint8_t port_id); + + +/** + * Link up an Ethernet device. + * + * Set device link up will re-enable the device rx/tx + * functionality after it is previously set device linked down. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - 0: Success, Ethernet device linked up. + * - <0: Error code of the driver device link up function. + */ +int rte_eth_dev_set_link_up(uint8_t port_id); + +/** + * Link down an Ethernet device. + * The device rx/tx functionality will be disabled if success, + * and it can be re-enabled with a call to + * rte_eth_dev_set_link_up() + * + * @param port_id + * The port identifier of the Ethernet device. + */ +int rte_eth_dev_set_link_down(uint8_t port_id); + +/** + * Close a stopped Ethernet device. The device cannot be restarted! + * The function frees all resources except for needed by the + * closed state. To free these resources, call rte_eth_dev_detach(). + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_dev_close(uint8_t port_id); + +/** + * Enable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_promiscuous_enable(uint8_t port_id); + +/** + * Disable receipt in promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_promiscuous_disable(uint8_t port_id); + +/** + * Return the value of promiscuous mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if promiscuous is enabled + * - (0) if promiscuous is disabled. + * - (-1) on error + */ +int rte_eth_promiscuous_get(uint8_t port_id); + +/** + * Enable the receipt of any multicast frame by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_allmulticast_enable(uint8_t port_id); + +/** + * Disable the receipt of all multicast frames by an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_allmulticast_disable(uint8_t port_id); + +/** + * Return the value of allmulticast mode for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (1) if allmulticast is enabled + * - (0) if allmulticast is disabled. + * - (-1) on error + */ +int rte_eth_allmulticast_get(uint8_t port_id); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It might need + * to wait up to 9 seconds in it. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +void rte_eth_link_get(uint8_t port_id, struct rte_eth_link *link); + +/** + * Retrieve the status (ON/OFF), the speed (in Mbps) and the mode (HALF-DUPLEX + * or FULL-DUPLEX) of the physical link of an Ethernet device. It is a no-wait + * version of rte_eth_link_get(). + * + * @param port_id + * The port identifier of the Ethernet device. + * @param link + * A pointer to an *rte_eth_link* structure to be filled with + * the status, the speed and the mode of the Ethernet device link. + */ +void rte_eth_link_get_nowait(uint8_t port_id, struct rte_eth_link *link); + +/** + * Retrieve the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param stats + * A pointer to a structure of type *rte_eth_stats* to be filled with + * the values of device counters for the following set of statistics: + * - *ipackets* with the total of successfully received packets. + * - *opackets* with the total of successfully transmitted packets. + * - *ibytes* with the total of successfully received bytes. + * - *obytes* with the total of successfully transmitted bytes. + * - *ierrors* with the total of erroneous received packets. + * - *oerrors* with the total of failed transmitted packets. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_stats_get(uint8_t port_id, struct rte_eth_stats *stats); + +/** + * Reset the general I/O statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_stats_reset(uint8_t port_id); + +/** + * Retrieve extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param xstats + * A pointer to a table of structure of type *rte_eth_xstats* + * to be filled with device statistics names and values. + * This parameter can be set to NULL if n is 0. + * @param n + * The size of the stats table, which should be large enough to store + * all the statistics of the device. + * @return + * - positive value lower or equal to n: success. The return value + * is the number of entries filled in the stats table. + * - positive value higher than n: error, the given statistics table + * is too small. The return value corresponds to the size that should + * be given to succeed. The entries in the table are not valid and + * shall not be used by the caller. + * - negative value on error (invalid port id) + */ +int rte_eth_xstats_get(uint8_t port_id, struct rte_eth_xstats *xstats, + unsigned n); + +/** + * Reset extended statistics of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + */ +void rte_eth_xstats_reset(uint8_t port_id); + +/** + * Set a mapping for the specified transmit queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tx_queue_id + * The index of the transmit queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the transmit + * queue is to be assigned. + * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_dev_set_tx_queue_stats_mapping(uint8_t port_id, + uint16_t tx_queue_id, uint8_t stat_idx); + +/** + * Set a mapping for the specified receive queue to the specified per-queue + * statistics counter. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param stat_idx + * The per-queue packet statistics functionality number that the receive + * queue is to be assigned. + * The value must be in the range [0, RTE_MAX_ETHPORT_QUEUE_STATS_MAPS - 1]. + * @return + * Zero if successful. Non-zero otherwise. + */ +int rte_eth_dev_set_rx_queue_stats_mapping(uint8_t port_id, + uint16_t rx_queue_id, + uint8_t stat_idx); + +/** + * Retrieve the Ethernet address of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mac_addr + * A pointer to a structure of type *ether_addr* to be filled with + * the Ethernet address of the Ethernet device. + */ +void rte_eth_macaddr_get(uint8_t port_id, struct ether_addr *mac_addr); + +/** + * Retrieve the contextual information of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dev_info + * A pointer to a structure of type *rte_eth_dev_info* to be filled with + * the contextual information of the Ethernet device. + */ +void rte_eth_dev_info_get(uint8_t port_id, struct rte_eth_dev_info *dev_info); + +/** + * Retrieve the supported packet types of an Ethernet device. + * + * @note + * Better to invoke this API after the device is already started or rx burst + * function is decided, to obtain correct supported ptypes. + * @note + * if a given PMD does not report what ptypes it supports, then the supported + * ptype count is reported as 0. + * @param port_id + * The port identifier of the Ethernet device. + * @param ptype_mask + * A hint of what kind of packet type which the caller is interested in. + * @param ptypes + * An array pointer to store adequent packet types, allocated by caller. + * @param num + * Size of the array pointed by param ptypes. + * @return + * - (>=0) Number of supported ptypes. If the number of types exceeds num, + * only num entries will be filled into the ptypes array, but the full + * count of supported ptypes will be returned. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_supported_ptypes(uint8_t port_id, uint32_t ptype_mask, + uint32_t *ptypes, int num); + +/** + * Retrieve the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A pointer to a uint16_t where the retrieved MTU is to be stored. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_mtu(uint8_t port_id, uint16_t *mtu); + +/** + * Change the MTU of an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mtu + * A uint16_t for the MTU to be applied. + * @return + * - (0) if successful. + * - (-ENOTSUP) if operation is not supported. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if *mtu* invalid. + */ +int rte_eth_dev_set_mtu(uint8_t port_id, uint16_t mtu); + +/** + * Enable/Disable hardware filtering by an Ethernet device of received + * VLAN packets tagged with a given VLAN Tag Identifier. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vlan_id + * The VLAN Tag Identifier whose filtering must be enabled or disabled. + * @param on + * If > 0, enable VLAN filtering of VLAN packets tagged with *vlan_id*. + * Otherwise, disable VLAN filtering of VLAN packets tagged with *vlan_id*. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOSYS) if VLAN filtering on *port_id* disabled. + * - (-EINVAL) if *vlan_id* > 4095. + */ +int rte_eth_dev_vlan_filter(uint8_t port_id, uint16_t vlan_id, int on); + +/** + * Enable/Disable hardware VLAN Strip by a rx queue of an Ethernet device. + * 82599/X540/X550 can support VLAN stripping at the rx queue level + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rx_queue_id + * The index of the receive queue for which a queue stats mapping is required. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param on + * If 1, Enable VLAN Stripping of the receive queue of the Ethernet port. + * If 0, Disable VLAN Stripping of the receive queue of the Ethernet port. + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN stripping not configured. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if *rx_queue_id* invalid. + */ +int rte_eth_dev_set_vlan_strip_on_queue(uint8_t port_id, uint16_t rx_queue_id, + int on); + +/** + * Set the Outer VLAN Ether Type by an Ethernet device, it can be inserted to + * the VLAN Header. This is a register setup available on some Intel NIC, not + * but all, please check the data sheet for availability. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vlan_type + * The vlan type. + * @param tag_type + * The Tag Protocol ID + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN TPID setup is not supported. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_set_vlan_ether_type(uint8_t port_id, + enum rte_vlan_type vlan_type, + uint16_t tag_type); + +/** + * Set VLAN offload configuration on an Ethernet device + * Enable/Disable Extended VLAN by an Ethernet device, This is a register setup + * available on some Intel NIC, not but all, please check the data sheet for + * availability. + * Enable/Disable VLAN Strip can be done on rx queue for certain NIC, but here + * the configuration is applied on the port level. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param offload_mask + * The VLAN Offload bit mask can be mixed use with "OR" + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * @return + * - (0) if successful. + * - (-ENOSUP) if hardware-assisted VLAN filtering not configured. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_set_vlan_offload(uint8_t port_id, int offload_mask); + +/** + * Read VLAN Offload configuration from an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>0) if successful. Bit mask to indicate + * ETH_VLAN_STRIP_OFFLOAD + * ETH_VLAN_FILTER_OFFLOAD + * ETH_VLAN_EXTEND_OFFLOAD + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_get_vlan_offload(uint8_t port_id); + +/** + * Set port based TX VLAN insersion on or off. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pvid + * Port based TX VLAN identifier togeth with user priority. + * @param on + * Turn on or off the port based TX VLAN insertion. + * + * @return + * - (0) if successful. + * - negative if failed. + */ +int rte_eth_dev_set_vlan_pvid(uint8_t port_id, uint16_t pvid, int on); + +/** + * + * Retrieve a burst of input packets from a receive queue of an Ethernet + * device. The retrieved packets are stored in *rte_mbuf* structures whose + * pointers are supplied in the *rx_pkts* array. + * + * The rte_eth_rx_burst() function loops, parsing the RX ring of the + * receive queue, up to *nb_pkts* packets, and for each completed RX + * descriptor in the ring, it performs the following operations: + * + * - Initialize the *rte_mbuf* data structure associated with the + * RX descriptor according to the information provided by the NIC into + * that RX descriptor. + * + * - Store the *rte_mbuf* data structure into the next entry of the + * *rx_pkts* array. + * + * - Replenish the RX descriptor with a new *rte_mbuf* buffer + * allocated from the memory pool associated with the receive queue at + * initialization time. + * + * When retrieving an input packet that was scattered by the controller + * into multiple receive descriptors, the rte_eth_rx_burst() function + * appends the associated *rte_mbuf* buffers to the first buffer of the + * packet. + * + * The rte_eth_rx_burst() function returns the number of packets + * actually retrieved, which is the number of *rte_mbuf* data structures + * effectively supplied into the *rx_pkts* array. + * A return value equal to *nb_pkts* indicates that the RX queue contained + * at least *rx_pkts* packets, and this is likely to signify that other + * received packets remain in the input queue. Applications implementing + * a "retrieve as much received packets as possible" policy can check this + * specific case and keep invoking the rte_eth_rx_burst() function until + * a value less than *nb_pkts* is returned. + * + * This receive method has the following advantages: + * + * - It allows a run-to-completion network stack engine to retrieve and + * to immediately process received packets in a fast burst-oriented + * approach, avoiding the overhead of unnecessary intermediate packet + * queue/dequeue operations. + * + * - Conversely, it also allows an asynchronous-oriented processing + * method to retrieve bursts of received packets and to immediately + * queue them for further parallel processing by another logical core, + * for instance. However, instead of having received packets being + * individually queued by the driver, this approach allows the invoker + * of the rte_eth_rx_burst() function to queue a burst of retrieved + * packets at a time and therefore dramatically reduce the cost of + * enqueue/dequeue operations per packet. + * + * - It allows the rte_eth_rx_burst() function of the driver to take + * advantage of burst-oriented hardware features (CPU cache, + * prefetch instructions, and so on) to minimize the number of CPU + * cycles per packet. + * + * To summarize, the proposed receive API enables many + * burst-oriented optimizations in both synchronous and asynchronous + * packet processing environments with no overhead in both cases. + * + * The rte_eth_rx_burst() function does not provide any error + * notification to avoid the corresponding overhead. As a hint, the + * upper-level application might check the status of the device link once + * being systematically returned a 0 value for a given number of tries. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param rx_pkts + * The address of an array of pointers to *rte_mbuf* structures that + * must be large enough to store *nb_pkts* pointers in it. + * @param nb_pkts + * The maximum number of packets to retrieve. + * @return + * The number of packets actually retrieved, which is the number + * of pointers to *rte_mbuf* structures effectively supplied to the + * *rx_pkts* array. + */ +static inline uint16_t +rte_eth_rx_burst(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **rx_pkts, const uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->rx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_rx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid RX queue_id=%d\n", queue_id); + return 0; + } +#endif + int16_t nb_rx = (*dev->rx_pkt_burst)(dev->data->rx_queues[queue_id], + rx_pkts, nb_pkts); + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + struct rte_eth_rxtx_callback *cb = dev->post_rx_burst_cbs[queue_id]; + + if (unlikely(cb != NULL)) { + do { + nb_rx = cb->fn.rx(port_id, queue_id, rx_pkts, nb_rx, + nb_pkts, cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return nb_rx; +} + +/** + * Get the number of used descriptors in a specific queue + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @return + * The number of used descriptors in the specific queue, or: + * (-EINVAL) if *port_id* is invalid + * (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_queue_count(uint8_t port_id, uint16_t queue_id) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -EINVAL); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_queue_count, -ENOTSUP); + return (*dev->dev_ops->rx_queue_count)(dev, queue_id); +} + +/** + * Check if the DD bit of the specific RX descriptor in the queue has been set + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue id on the specific port. + * @param offset + * The offset of the descriptor ID from tail. + * @return + * - (1) if the specific DD bit is set. + * - (0) if the specific DD bit is not set. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOTSUP) if the device does not support this function + */ +static inline int +rte_eth_rx_descriptor_done(uint8_t port_id, uint16_t queue_id, uint16_t offset) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, -ENODEV); + RTE_FUNC_PTR_OR_ERR_RET(*dev->dev_ops->rx_descriptor_done, -ENOTSUP); + return (*dev->dev_ops->rx_descriptor_done)( \ + dev->data->rx_queues[queue_id], offset); +} + +/** + * Send a burst of output packets on a transmit queue of an Ethernet device. + * + * The rte_eth_tx_burst() function is invoked to transmit output packets + * on the output queue *queue_id* of the Ethernet device designated by its + * *port_id*. + * The *nb_pkts* parameter is the number of packets to send which are + * supplied in the *tx_pkts* array of *rte_mbuf* structures. + * The rte_eth_tx_burst() function loops, sending *nb_pkts* packets, + * up to the number of transmit descriptors available in the TX ring of the + * transmit queue. + * For each packet to send, the rte_eth_tx_burst() function performs + * the following operations: + * + * - Pick up the next available descriptor in the transmit ring. + * + * - Free the network buffer previously sent with that descriptor, if any. + * + * - Initialize the transmit descriptor with the information provided + * in the *rte_mbuf data structure. + * + * In the case of a segmented packet composed of a list of *rte_mbuf* buffers, + * the rte_eth_tx_burst() function uses several transmit descriptors + * of the ring. + * + * The rte_eth_tx_burst() function returns the number of packets it + * actually sent. A return value equal to *nb_pkts* means that all packets + * have been sent, and this is likely to signify that other output packets + * could be immediately transmitted again. Applications that implement a + * "send as many packets to transmit as possible" policy can check this + * specific case and keep invoking the rte_eth_tx_burst() function until + * a value less than *nb_pkts* is returned. + * + * It is the responsibility of the rte_eth_tx_burst() function to + * transparently free the memory buffers of packets previously sent. + * This feature is driven by the *tx_free_thresh* value supplied to the + * rte_eth_dev_configure() function at device configuration time. + * When the number of free TX descriptors drops below this threshold, the + * rte_eth_tx_burst() function must [attempt to] free the *rte_mbuf* buffers + * of those packets whose transmission was effectively completed. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param tx_pkts + * The address of an array of *nb_pkts* pointers to *rte_mbuf* structures + * which contain the output packets. + * @param nb_pkts + * The maximum number of packets to transmit. + * @return + * The number of output packets actually stored in transmit descriptors of + * the transmit ring. The return value can be less than the value of the + * *tx_pkts* parameter when the transmit ring is full or has been filled up. + */ +static inline uint16_t +rte_eth_tx_burst(uint8_t port_id, uint16_t queue_id, + struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct rte_eth_dev *dev = &rte_eth_devices[port_id]; + +#ifdef RTE_LIBRTE_ETHDEV_DEBUG + RTE_ETH_VALID_PORTID_OR_ERR_RET(port_id, 0); + RTE_FUNC_PTR_OR_ERR_RET(*dev->tx_pkt_burst, 0); + + if (queue_id >= dev->data->nb_tx_queues) { + RTE_PMD_DEBUG_TRACE("Invalid TX queue_id=%d\n", queue_id); + return 0; + } +#endif + +#ifdef RTE_ETHDEV_RXTX_CALLBACKS + struct rte_eth_rxtx_callback *cb = dev->pre_tx_burst_cbs[queue_id]; + + if (unlikely(cb != NULL)) { + do { + nb_pkts = cb->fn.tx(port_id, queue_id, tx_pkts, nb_pkts, + cb->param); + cb = cb->next; + } while (cb != NULL); + } +#endif + + return (*dev->tx_pkt_burst)(dev->data->tx_queues[queue_id], tx_pkts, nb_pkts); +} + +typedef void (*buffer_tx_error_fn)(struct rte_mbuf **unsent, uint16_t count, + void *userdata); + +/** + * Structure used to buffer packets for future TX + * Used by APIs rte_eth_tx_buffer and rte_eth_tx_buffer_flush + */ +struct rte_eth_dev_tx_buffer { + buffer_tx_error_fn error_callback; + void *error_userdata; + uint16_t size; /**< Size of buffer for buffered tx */ + uint16_t length; /**< Number of packets in the array */ + struct rte_mbuf *pkts[]; + /**< Pending packets to be sent on explicit flush or when full */ +}; + +/** + * Calculate the size of the tx buffer. + * + * @param sz + * Number of stored packets. + */ +#define RTE_ETH_TX_BUFFER_SIZE(sz) \ + (sizeof(struct rte_eth_dev_tx_buffer) + (sz) * sizeof(struct rte_mbuf *)) + +/** + * Initialize default values for buffered transmitting + * + * @param buffer + * Tx buffer to be initialized. + * @param size + * Buffer size + * @return + * 0 if no error + */ +int +rte_eth_tx_buffer_init(struct rte_eth_dev_tx_buffer *buffer, uint16_t size); + +/** + * Send any packets queued up for transmission on a port and HW queue + * + * This causes an explicit flush of packets previously buffered via the + * rte_eth_tx_buffer() function. It returns the number of packets successfully + * sent to the NIC, and calls the error callback for any unsent packets. Unless + * explicitly set up otherwise, the default callback simply frees the unsent + * packets back to the owning mempool. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param buffer + * Buffer of packets to be transmit. + * @return + * The number of packets successfully sent to the Ethernet device. The error + * callback is called for any packets which could not be sent. + */ +static inline uint16_t +rte_eth_tx_buffer_flush(uint8_t port_id, uint16_t queue_id, + struct rte_eth_dev_tx_buffer *buffer) +{ + uint16_t sent; + uint16_t to_send = buffer->length; + + if (to_send == 0) + return 0; + + sent = rte_eth_tx_burst(port_id, queue_id, buffer->pkts, to_send); + + buffer->length = 0; + + /* All packets sent, or to be dealt with by callback below */ + if (unlikely(sent != to_send)) + buffer->error_callback(&buffer->pkts[sent], to_send - sent, + buffer->error_userdata); + + return sent; +} + +/** + * Buffer a single packet for future transmission on a port and queue + * + * This function takes a single mbuf/packet and buffers it for later + * transmission on the particular port and queue specified. Once the buffer is + * full of packets, an attempt will be made to transmit all the buffered + * packets. In case of error, where not all packets can be transmitted, a + * callback is called with the unsent packets as a parameter. If no callback + * is explicitly set up, the unsent packets are just freed back to the owning + * mempool. The function returns the number of packets actually sent i.e. + * 0 if no buffer flush occurred, otherwise the number of packets successfully + * flushed + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the transmit queue through which output packets must be + * sent. + * The value must be in the range [0, nb_tx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param buffer + * Buffer used to collect packets to be sent. + * @param tx_pkt + * Pointer to the packet mbuf to be sent. + * @return + * 0 = packet has been buffered for later transmission + * N > 0 = packet has been buffered, and the buffer was subsequently flushed, + * causing N packets to be sent, and the error callback to be called for + * the rest. + */ +static inline uint16_t __attribute__((always_inline)) +rte_eth_tx_buffer(uint8_t port_id, uint16_t queue_id, + struct rte_eth_dev_tx_buffer *buffer, struct rte_mbuf *tx_pkt) +{ + buffer->pkts[buffer->length++] = tx_pkt; + if (buffer->length < buffer->size) + return 0; + + return rte_eth_tx_buffer_flush(port_id, queue_id, buffer); +} + +/** + * Configure a callback for buffered packets which cannot be sent + * + * Register a specific callback to be called when an attempt is made to send + * all packets buffered on an ethernet port, but not all packets can + * successfully be sent. The callback registered here will be called only + * from calls to rte_eth_tx_buffer() and rte_eth_tx_buffer_flush() APIs. + * The default callback configured for each queue by default just frees the + * packets back to the calling mempool. If additional behaviour is required, + * for example, to count dropped packets, or to retry transmission of packets + * which cannot be sent, this function should be used to register a suitable + * callback function to implement the desired behaviour. + * The example callback "rte_eth_count_unsent_packet_callback()" is also + * provided as reference. + * + * @param buffer + * The port identifier of the Ethernet device. + * @param callback + * The function to be used as the callback. + * @param userdata + * Arbitrary parameter to be passed to the callback function + * @return + * 0 on success, or -1 on error with rte_errno set appropriately + */ +int +rte_eth_tx_buffer_set_err_callback(struct rte_eth_dev_tx_buffer *buffer, + buffer_tx_error_fn callback, void *userdata); + +/** + * Callback function for silently dropping unsent buffered packets. + * + * This function can be passed to rte_eth_tx_buffer_set_err_callback() to + * adjust the default behavior when buffered packets cannot be sent. This + * function drops any unsent packets silently and is used by tx buffered + * operations as default behavior. + * + * NOTE: this function should not be called directly, instead it should be used + * as a callback for packet buffering. + * + * NOTE: when configuring this function as a callback with + * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter + * should point to an uint64_t value. + * + * @param pkts + * The previously buffered packets which could not be sent + * @param unsent + * The number of unsent packets in the pkts array + * @param userdata + * Not used + */ +void +rte_eth_tx_buffer_drop_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata); + +/** + * Callback function for tracking unsent buffered packets. + * + * This function can be passed to rte_eth_tx_buffer_set_err_callback() to + * adjust the default behavior when buffered packets cannot be sent. This + * function drops any unsent packets, but also updates a user-supplied counter + * to track the overall number of packets dropped. The counter should be an + * uint64_t variable. + * + * NOTE: this function should not be called directly, instead it should be used + * as a callback for packet buffering. + * + * NOTE: when configuring this function as a callback with + * rte_eth_tx_buffer_set_err_callback(), the final, userdata parameter + * should point to an uint64_t value. + * + * @param pkts + * The previously buffered packets which could not be sent + * @param unsent + * The number of unsent packets in the pkts array + * @param userdata + * Pointer to an uint64_t value, which will be incremented by unsent + */ +void +rte_eth_tx_buffer_count_callback(struct rte_mbuf **pkts, uint16_t unsent, + void *userdata); + +/** + * The eth device event type for interrupt, and maybe others in the future. + */ +enum rte_eth_event_type { + RTE_ETH_EVENT_UNKNOWN, /**< unknown event type */ + RTE_ETH_EVENT_INTR_LSC, /**< lsc interrupt event */ + RTE_ETH_EVENT_QUEUE_STATE, + /**< queue state event (enabled/disabled) */ + RTE_ETH_EVENT_INTR_RESET, + /**< reset interrupt event, sent to VF on PF reset */ + RTE_ETH_EVENT_MAX /**< max value of this enum */ +}; + +typedef void (*rte_eth_dev_cb_fn)(uint8_t port_id, \ + enum rte_eth_event_type event, void *cb_arg); +/**< user application callback to be registered for interrupts */ + + + +/** + * Register a callback function for specific port id. + * + * @param port_id + * Port id. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_register(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * Unregister a callback function for specific port id. + * + * @param port_id + * Port id. + * @param event + * Event interested. + * @param cb_fn + * User supplied callback function to be called. + * @param cb_arg + * Pointer to the parameters for the registered callback. -1 means to + * remove all for the same callback address and same event. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_callback_unregister(uint8_t port_id, + enum rte_eth_event_type event, + rte_eth_dev_cb_fn cb_fn, void *cb_arg); + +/** + * @internal Executes all the user application registered callbacks for + * the specific device. It is for DPDK internal user only. User + * application should not call it directly. + * + * @param dev + * Pointer to struct rte_eth_dev. + * @param event + * Eth device interrupt event type. + * + * @return + * void + */ +void _rte_eth_dev_callback_process(struct rte_eth_dev *dev, + enum rte_eth_event_type event); + +/** + * When there is no rx packet coming in Rx Queue for a long time, we can + * sleep lcore related to RX Queue for power saving, and enable rx interrupt + * to be triggered when rx packect arrives. + * + * The rte_eth_dev_rx_intr_enable() function enables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_rx_intr_enable(uint8_t port_id, uint16_t queue_id); + +/** + * When lcore wakes up from rx interrupt indicating packet coming, disable rx + * interrupt and returns to polling mode. + * + * The rte_eth_dev_rx_intr_disable() function disables rx queue + * interrupt on specific rx queue of a port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_rx_intr_disable(uint8_t port_id, uint16_t queue_id); + +/** + * RX Interrupt control per port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl(uint8_t port_id, int epfd, int op, void *data); + +/** + * RX Interrupt control per queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The index of the receive queue from which to retrieve input packets. + * The value must be in the range [0, nb_rx_queue - 1] previously supplied + * to rte_eth_dev_configure(). + * @param epfd + * Epoll instance fd which the intr vector associated to. + * Using RTE_EPOLL_PER_THREAD allows to use per thread epoll instance. + * @param op + * The operation be performed for the vector. + * Operation type of {RTE_INTR_EVENT_ADD, RTE_INTR_EVENT_DEL}. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_eth_dev_rx_intr_ctl_q(uint8_t port_id, uint16_t queue_id, + int epfd, int op, void *data); + +/** + * Turn on the LED on the Ethernet device. + * This function turns on the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_led_on(uint8_t port_id); + +/** + * Turn off the LED on the Ethernet device. + * This function turns off the LED on the Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if underlying hardware OR driver doesn't support + * that operation. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_led_off(uint8_t port_id); + +/** + * Get current status of the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure where to store the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_flow_ctrl_get(uint8_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet link flow control for Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param fc_conf + * The pointer to the structure of the flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure + */ +int rte_eth_dev_flow_ctrl_set(uint8_t port_id, + struct rte_eth_fc_conf *fc_conf); + +/** + * Configure the Ethernet priority flow control under DCB environment + * for Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param pfc_conf + * The pointer to the structure of the priority flow control parameters. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support priority flow control mode. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter + * - (-EIO) if flow control setup failure + */ +int rte_eth_dev_priority_flow_ctrl_set(uint8_t port_id, + struct rte_eth_pfc_conf *pfc_conf); + +/** + * Add a MAC address to an internal array of addresses used to enable whitelist + * filtering to accept packets only if the destination MAC address matches. + * + * @param port + * The port identifier of the Ethernet device. + * @param mac_addr + * The MAC address to add. + * @param pool + * VMDq pool index to associate address with (if VMDq is enabled). If VMDq is + * not enabled, this should be set to 0. + * @return + * - (0) if successfully added or *mac_addr" was already added. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port* is invalid. + * - (-ENOSPC) if no more MAC addresses can be added. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_mac_addr_add(uint8_t port, struct ether_addr *mac_addr, + uint32_t pool); + +/** + * Remove a MAC address from the internal array of addresses. + * + * @param port + * The port identifier of the Ethernet device. + * @param mac_addr + * MAC address to remove. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EADDRINUSE) if attempting to remove the default MAC address + */ +int rte_eth_dev_mac_addr_remove(uint8_t port, struct ether_addr *mac_addr); + +/** + * Set the default MAC address. + * + * @param port + * The port identifier of the Ethernet device. + * @param mac_addr + * New default MAC address. + * @return + * - (0) if successful, or *mac_addr* didn't exist. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port* invalid. + * - (-EINVAL) if MAC address is invalid. + */ +int rte_eth_dev_default_mac_addr_set(uint8_t port, struct ether_addr *mac_addr); + + +/** + * Update Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to update. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_reta_update(uint8_t port, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Query Redirection Table(RETA) of Receive Side Scaling of Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param reta_conf + * RETA to query. + * @param reta_size + * Redirection table size. The table size can be queried by + * rte_eth_dev_info_get(). + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_reta_query(uint8_t port, + struct rte_eth_rss_reta_entry64 *reta_conf, + uint16_t reta_size); + + /** + * Updates unicast hash table for receiving packet with the given destination + * MAC address, and the packet is routed to all VFs for which the RX mode is + * accept packets that match the unicast hash table. + * + * @param port + * The port identifier of the Ethernet device. + * @param addr + * Unicast MAC address. + * @param on + * 1 - Set an unicast hash bit for receiving packets with the MAC address. + * 0 - Clear an unicast hash bit. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_hash_table_set(uint8_t port,struct ether_addr *addr, + uint8_t on); + + /** + * Updates all unicast hash bitmaps for receiving packet with any Unicast + * Ethernet MAC addresses,the packet is routed to all VFs for which the RX + * mode is accept packets that match the unicast hash table. + * + * @param port + * The port identifier of the Ethernet device. + * @param on + * 1 - Set all unicast hash bitmaps for receiving all the Ethernet + * MAC addresses + * 0 - Clear all unicast hash bitmaps + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_uc_all_hash_table_set(uint8_t port,uint8_t on); + + /** + * Set RX L2 Filtering mode of a VF of an Ethernet device. + * + * @param port + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param rx_mode + * The RX mode mask, which is one or more of accepting Untagged Packets, + * packets that match the PFUTA table, Broadcast and Multicast Promiscuous. + * ETH_VMDQ_ACCEPT_UNTAG,ETH_VMDQ_ACCEPT_HASH_UC, + * ETH_VMDQ_ACCEPT_BROADCAST and ETH_VMDQ_ACCEPT_MULTICAST will be used + * in rx_mode. + * @param on + * 1 - Enable a VF RX mode. + * 0 - Disable a VF RX mode. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_set_vf_rxmode(uint8_t port, uint16_t vf, uint16_t rx_mode, + uint8_t on); + +/** +* Enable or disable a VF traffic transmit of the Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic transmit. +* 0 - Disable a VF traffic transmit. +* @return +* - (0) if successful. +* - (-ENODEV) if *port_id* invalid. +* - (-ENOTSUP) if hardware doesn't support. +* - (-EINVAL) if bad parameter. +*/ +int +rte_eth_dev_set_vf_tx(uint8_t port,uint16_t vf, uint8_t on); + +/** +* Enable or disable a VF traffic receive of an Ethernet device. +* +* @param port +* The port identifier of the Ethernet device. +* @param vf +* VF id. +* @param on +* 1 - Enable a VF traffic receive. +* 0 - Disable a VF traffic receive. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_eth_dev_set_vf_rx(uint8_t port,uint16_t vf, uint8_t on); + +/** +* Enable/Disable hardware VF VLAN filtering by an Ethernet device of +* received VLAN packets tagged with a given VLAN Tag Identifier. +* +* @param port id +* The port identifier of the Ethernet device. +* @param vlan_id +* The VLAN Tag Identifier whose filtering must be enabled or disabled. +* @param vf_mask +* Bitmap listing which VFs participate in the VLAN filtering. +* @param vlan_on +* 1 - Enable VFs VLAN filtering. +* 0 - Disable VFs VLAN filtering. +* @return +* - (0) if successful. +* - (-ENOTSUP) if hardware doesn't support. +* - (-ENODEV) if *port_id* invalid. +* - (-EINVAL) if bad parameter. +*/ +int +rte_eth_dev_set_vf_vlan_filter(uint8_t port, uint16_t vlan_id, + uint64_t vf_mask, + uint8_t vlan_on); + +/** + * Set a traffic mirroring rule on an Ethernet device + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mirror_conf + * The pointer to the traffic mirroring structure describing the mirroring rule. + * The *rte_eth_vm_mirror_conf* structure includes the type of mirroring rule, + * destination pool and the value of rule if enable vlan or pool mirroring. + * + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @param on + * 1 - Enable a mirroring rule. + * 0 - Disable a mirroring rule. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if the mr_conf information is not correct. + */ +int rte_eth_mirror_rule_set(uint8_t port_id, + struct rte_eth_mirror_conf *mirror_conf, + uint8_t rule_id, + uint8_t on); + +/** + * Reset a traffic mirroring rule on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rule_id + * The index of traffic mirroring rule, we support four separated rules. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_mirror_rule_reset(uint8_t port_id, + uint8_t rule_id); + +/** + * Set the rate limitation for a queue on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_idx + * The queue id. + * @param tx_rate + * The tx rate in Mbps. Allocated from the total port link speed. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_set_queue_rate_limit(uint8_t port_id, uint16_t queue_idx, + uint16_t tx_rate); + +/** + * Set the rate limitation for a vf on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param vf + * VF id. + * @param tx_rate + * The tx rate allocated from the total link speed for this VF id. + * @param q_msk + * The queue mask which need to set the rate. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this feature. + * - (-ENODEV) if *port_id* invalid. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_set_vf_rate_limit(uint8_t port_id, uint16_t vf, + uint16_t tx_rate, uint64_t q_msk); + +/** + * Initialize bypass logic. This function needs to be called before + * executing any other bypass API. + * + * @param port + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_init(uint8_t port); + +/** + * Return bypass state. + * + * @param port + * The port identifier of the Ethernet device. + * @param state + * The return bypass state. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_state_show(uint8_t port, uint32_t *state); + +/** + * Set bypass state + * + * @param port + * The port identifier of the Ethernet device. + * @param new_state + * The current bypass state. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_state_set(uint8_t port, uint32_t *new_state); + +/** + * Return bypass state when given event occurs. + * + * @param port + * The port identifier of the Ethernet device. + * @param event + * The bypass event + * - (1) Main power on (power button is pushed) + * - (2) Auxiliary power on (power supply is being plugged) + * - (3) Main power off (system shutdown and power supply is left plugged in) + * - (4) Auxiliary power off (power supply is being unplugged) + * - (5) Display or set the watchdog timer + * @param state + * The bypass state when given event occurred. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_event_show(uint8_t port, uint32_t event, uint32_t *state); + +/** + * Set bypass state when given event occurs. + * + * @param port + * The port identifier of the Ethernet device. + * @param event + * The bypass event + * - (1) Main power on (power button is pushed) + * - (2) Auxiliary power on (power supply is being plugged) + * - (3) Main power off (system shutdown and power supply is left plugged in) + * - (4) Auxiliary power off (power supply is being unplugged) + * - (5) Display or set the watchdog timer + * @param state + * The assigned state when given event occurs. + * - (1) Normal mode + * - (2) Bypass mode + * - (3) Isolate mode + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_event_store(uint8_t port, uint32_t event, uint32_t state); + +/** + * Set bypass watchdog timeout count. + * + * @param port + * The port identifier of the Ethernet device. + * @param timeout + * The timeout to be set. + * - (0) 0 seconds (timer is off) + * - (1) 1.5 seconds + * - (2) 2 seconds + * - (3) 3 seconds + * - (4) 4 seconds + * - (5) 8 seconds + * - (6) 16 seconds + * - (7) 32 seconds + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_wd_timeout_store(uint8_t port, uint32_t timeout); + +/** + * Get bypass firmware version. + * + * @param port + * The port identifier of the Ethernet device. + * @param ver + * The firmware version + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_ver_show(uint8_t port, uint32_t *ver); + +/** + * Return bypass watchdog timeout in seconds + * + * @param port + * The port identifier of the Ethernet device. + * @param wd_timeout + * The return watchdog timeout. "0" represents timer expired + * - (0) 0 seconds (timer is off) + * - (1) 1.5 seconds + * - (2) 2 seconds + * - (3) 3 seconds + * - (4) 4 seconds + * - (5) 8 seconds + * - (6) 16 seconds + * - (7) 32 seconds + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_wd_timeout_show(uint8_t port, uint32_t *wd_timeout); + +/** + * Reset bypass watchdog timer + * + * @param port + * The port identifier of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_bypass_wd_reset(uint8_t port); + + /** + * Configuration of Receive Side Scaling hash computation of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * The new configuration to use for RSS hash computation on the port. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support. + * - (-EINVAL) if bad parameter. + */ +int rte_eth_dev_rss_hash_update(uint8_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Retrieve current configuration of Receive Side Scaling hash computation + * of Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param rss_conf + * Where to store the current RSS hash configuration of the Ethernet device. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support RSS. + */ +int +rte_eth_dev_rss_hash_conf_get(uint8_t port_id, + struct rte_eth_rss_conf *rss_conf); + + /** + * Add UDP tunneling port for a specific type of tunnel. + * The packets with this UDP port will be identified as this type of tunnel. + * Before enabling any offloading function for a tunnel, users can call this API + * to change or add more UDP port for the tunnel. So the offloading function + * can take effect on the packets with the sepcific UDP port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_port_add(uint8_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + + /** + * Delete UDP tunneling port a specific type of tunnel. + * The packets with this UDP port will not be identified as this type of tunnel + * any more. + * Before enabling any offloading function for a tunnel, users can call this API + * to delete a UDP port for the tunnel. So the offloading function will not take + * effect on the packets with the sepcific UDP port. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param tunnel_udp + * UDP tunneling configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_udp_tunnel_port_delete(uint8_t port_id, + struct rte_eth_udp_tunnel *tunnel_udp); + +/** + * Check whether the filter type is supported on an Ethernet device. + * All the supported filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support this filter type. + * - (-ENODEV) if *port_id* invalid. + */ +int rte_eth_dev_filter_supported(uint8_t port_id, enum rte_filter_type filter_type); + +/** + * Take operations to assigned filter type on an Ethernet device. + * All the supported operations and filter types are defined in 'rte_eth_ctrl.h'. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param filter_type + * Filter type. + * @param filter_op + * Type of operation. + * @param arg + * A pointer to arguments defined specifically for the operation. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_filter_ctrl(uint8_t port_id, enum rte_filter_type filter_type, + enum rte_filter_op filter_op, void *arg); + +/** + * Get DCB information on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param dcb_info + * dcb information. + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support. + */ +int rte_eth_dev_get_dcb_info(uint8_t port_id, + struct rte_eth_dcb_info *dcb_info); + +/** + * Add a callback to be called on packet RX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets received on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_rx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +void *rte_eth_add_rx_callback(uint8_t port_id, uint16_t queue_id, + rte_rx_callback_fn fn, void *user_param); + +/** + * Add a callback to be called on packet TX on a given port and queue. + * + * This API configures a function to be called for each burst of + * packets sent on a given NIC port queue. The return value is a pointer + * that can be used to later remove the callback using + * rte_eth_remove_tx_callback(). + * + * Multiple functions are called in the order that they are added. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device on which the callback is to be added. + * @param fn + * The callback function + * @param user_param + * A generic pointer parameter which will be passed to each invocation of the + * callback function on this port and queue. + * + * @return + * NULL on error. + * On success, a pointer value which can later be used to remove the callback. + */ +void *rte_eth_add_tx_callback(uint8_t port_id, uint16_t queue_id, + rte_tx_callback_fn fn, void *user_param); + +/** + * Remove an RX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_rx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_rx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_rx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb); + +/** + * Remove a TX packet callback from a given port and queue. + * + * This function is used to removed callbacks that were added to a NIC port + * queue using rte_eth_add_tx_callback(). + * + * Note: the callback is removed from the callback list but it isn't freed + * since the it may still be in use. The memory for the callback can be + * subsequently freed back by the application by calling rte_free(): + * + * - Immediately - if the port is stopped, or the user knows that no + * callbacks are in flight e.g. if called from the thread doing RX/TX + * on that queue. + * + * - After a short delay - where the delay is sufficient to allow any + * in-flight callbacks to complete. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The queue on the Ethernet device from which the callback is to be removed. + * @param user_cb + * User supplied callback created via rte_eth_add_tx_callback(). + * + * @return + * - 0: Success. Callback was removed. + * - -ENOTSUP: Callback support is not available. + * - -EINVAL: The port_id or the queue_id is out of range, or the callback + * is NULL or not found for the port/queue. + */ +int rte_eth_remove_tx_callback(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxtx_callback *user_cb); + +/** + * Retrieve information about given port's RX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The RX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_rxq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_rx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_rxq_info *qinfo); + +/** + * Retrieve information about given port's TX queue. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param queue_id + * The TX queue on the Ethernet device for which information + * will be retrieved. + * @param qinfo + * A pointer to a structure of type *rte_eth_txq_info_info* to be filled with + * the information of the Ethernet device. + * + * @return + * - 0: Success + * - -ENOTSUP: routine is not supported by the device PMD. + * - -EINVAL: The port_id or the queue_id is out of range. + */ +int rte_eth_tx_queue_info_get(uint8_t port_id, uint16_t queue_id, + struct rte_eth_txq_info *qinfo); + +/* + * Retrieve number of available registers for access + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>=0) number of registers if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_reg_length(uint8_t port_id); + +/** + * Retrieve device registers and register attributes + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes buffer for register data and attribute to be filled. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_reg_info(uint8_t port_id, struct rte_dev_reg_info *info); + +/** + * Retrieve size of device EEPROM + * + * @param port_id + * The port identifier of the Ethernet device. + * @return + * - (>=0) EEPROM size if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom_length(uint8_t port_id); + +/** + * Retrieve EEPROM and EEPROM attribute + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes buffer for return EEPROM data and + * EEPROM attributes to be filled. + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_get_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); + +/** + * Program EEPROM with provided data + * + * @param port_id + * The port identifier of the Ethernet device. + * @param info + * The template includes EEPROM data for programming and + * EEPROM attributes to be filled + * @return + * - (0) if successful. + * - (-ENOTSUP) if hardware doesn't support. + * - (-ENODEV) if *port_id* invalid. + * - others depends on the specific operations implementation. + */ +int rte_eth_dev_set_eeprom(uint8_t port_id, struct rte_dev_eeprom_info *info); + +/** + * Set the list of multicast addresses to filter on an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param mc_addr_set + * The array of multicast addresses to set. Equal to NULL when the function + * is invoked to flush the set of filtered addresses. + * @param nb_mc_addr + * The number of multicast addresses in the *mc_addr_set* array. Equal to 0 + * when the function is invoked to flush the set of filtered addresses. + * @return + * - (0) if successful. + * - (-ENODEV) if *port_id* invalid. + * - (-ENOTSUP) if PMD of *port_id* doesn't support multicast filtering. + * - (-ENOSPC) if *port_id* has not enough multicast filtering resources. + */ +int rte_eth_dev_set_mc_addr_list(uint8_t port_id, + struct ether_addr *mc_addr_set, + uint32_t nb_mc_addr); + +/** + * Enable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_enable(uint8_t port_id); + +/** + * Disable IEEE1588/802.1AS timestamping for an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_disable(uint8_t port_id); + +/** + * Read an IEEE1588/802.1AS RX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * @param flags + * Device specific flags. Used to pass the RX timesync register index to + * i40e. Unused in igb/ixgbe, pass 0 instead. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_read_rx_timestamp(uint8_t port_id, + struct timespec *timestamp, uint32_t flags); + +/** + * Read an IEEE1588/802.1AS TX timestamp from an Ethernet device. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param timestamp + * Pointer to the timestamp struct. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_read_tx_timestamp(uint8_t port_id, + struct timespec *timestamp); + +/** + * Adjust the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param delta + * The adjustment in nanoseconds. + * + * @return + * - 0: Success. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_adjust_time(uint8_t port_id, int64_t delta); + +/** + * Read the time from the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + */ +int rte_eth_timesync_read_time(uint8_t port_id, struct timespec *time); + +/** + * Set the time of the timesync clock on an Ethernet device. + * + * This is usually used in conjunction with other Ethdev timesync functions to + * synchronize the device time using the IEEE1588/802.1AS protocol. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param time + * Pointer to the timespec struct that holds the time. + * + * @return + * - 0: Success. + * - -EINVAL: No timestamp is available. + * - -ENODEV: The port ID is invalid. + * - -ENOTSUP: The function is not supported by the Ethernet driver. + */ +int rte_eth_timesync_write_time(uint8_t port_id, const struct timespec *time); + +/** + * Copy pci device info to the Ethernet device data. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure. + * @param pci_dev + * The *pci_dev* pointer is the address of the *rte_pci_device* structure. + * + * @return + * - 0 on success, negative on error + */ +void rte_eth_copy_pci_info(struct rte_eth_dev *eth_dev, + struct rte_pci_device *pci_dev); + +/** + * Create memzone for HW rings. + * malloc can't be used as the physical address is needed. + * If the memzone is already created, then this function returns a ptr + * to the old one. + * + * @param eth_dev + * The *eth_dev* pointer is the address of the *rte_eth_dev* structure + * @param name + * The name of the memory zone + * @param queue_id + * The index of the queue to add to name + * @param size + * The sizeof of the memory area + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @param socket_id + * The *socket_id* argument is the socket identifier in case of NUMA. + */ +const struct rte_memzone * +rte_eth_dma_zone_reserve(const struct rte_eth_dev *eth_dev, const char *name, + uint16_t queue_id, size_t size, + unsigned align, int socket_id); + +/** + * Config l2 tunnel ether type of an Ethernet device for filtering specific + * tunnel packets by ether type. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param l2_tunnel + * l2 tunnel configuration. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_l2_tunnel_eth_type_conf(uint8_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel); + +/** + * Enable/disable l2 tunnel offload functions. Include, + * 1, The ability of parsing a type of l2 tunnel of an Ethernet device. + * Filtering, forwarding and offloading this type of tunnel packets depend on + * this ability. + * 2, Stripping the l2 tunnel tag. + * 3, Insertion of the l2 tunnel tag. + * 4, Forwarding the packets based on the l2 tunnel tag. + * + * @param port_id + * The port identifier of the Ethernet device. + * @param l2_tunnel + * l2 tunnel parameters. + * @param mask + * Indicate the offload function. + * @param en + * Enable or disable this function. + * + * @return + * - (0) if successful. + * - (-ENODEV) if port identifier is invalid. + * - (-ENOTSUP) if hardware doesn't support tunnel type. + */ +int +rte_eth_dev_l2_tunnel_offload_set(uint8_t port_id, + struct rte_eth_l2_tunnel_conf *l2_tunnel, + uint32_t mask, + uint8_t en); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHDEV_H_ */ diff --git a/lib/librte_ether/rte_ether.h b/lib/librte_ether/rte_ether.h new file mode 100644 index 00000000..1d62d8e5 --- /dev/null +++ b/lib/librte_ether/rte_ether.h @@ -0,0 +1,416 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ETHER_H_ +#define _RTE_ETHER_H_ + +/** + * @file + * + * Ethernet Helpers in RTE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include +#include +#include + +#define ETHER_ADDR_LEN 6 /**< Length of Ethernet address. */ +#define ETHER_TYPE_LEN 2 /**< Length of Ethernet type field. */ +#define ETHER_CRC_LEN 4 /**< Length of Ethernet CRC. */ +#define ETHER_HDR_LEN \ + (ETHER_ADDR_LEN * 2 + ETHER_TYPE_LEN) /**< Length of Ethernet header. */ +#define ETHER_MIN_LEN 64 /**< Minimum frame len, including CRC. */ +#define ETHER_MAX_LEN 1518 /**< Maximum frame len, including CRC. */ +#define ETHER_MTU \ + (ETHER_MAX_LEN - ETHER_HDR_LEN - ETHER_CRC_LEN) /**< Ethernet MTU. */ + +#define ETHER_MAX_VLAN_FRAME_LEN \ + (ETHER_MAX_LEN + 4) /**< Maximum VLAN frame length, including CRC. */ + +#define ETHER_MAX_JUMBO_FRAME_LEN \ + 0x3F00 /**< Maximum Jumbo frame length, including CRC. */ + +#define ETHER_MAX_VLAN_ID 4095 /**< Maximum VLAN ID. */ + +#define ETHER_MIN_MTU 68 /**< Minimum MTU for IPv4 packets, see RFC 791. */ + +/** + * Ethernet address: + * A universally administered address is uniquely assigned to a device by its + * manufacturer. The first three octets (in transmission order) contain the + * Organizationally Unique Identifier (OUI). The following three (MAC-48 and + * EUI-48) octets are assigned by that organization with the only constraint + * of uniqueness. + * A locally administered address is assigned to a device by a network + * administrator and does not contain OUIs. + * See http://standards.ieee.org/regauth/groupmac/tutorial.html + */ +struct ether_addr { + uint8_t addr_bytes[ETHER_ADDR_LEN]; /**< Address bytes in transmission order */ +} __attribute__((__packed__)); + +#define ETHER_LOCAL_ADMIN_ADDR 0x02 /**< Locally assigned Eth. address. */ +#define ETHER_GROUP_ADDR 0x01 /**< Multicast or broadcast Eth. address. */ + +/** + * Check if two Ethernet addresses are the same. + * + * @param ea1 + * A pointer to the first ether_addr structure containing + * the ethernet address. + * @param ea2 + * A pointer to the second ether_addr structure containing + * the ethernet address. + * + * @return + * True (1) if the given two ethernet address are the same; + * False (0) otherwise. + */ +static inline int is_same_ether_addr(const struct ether_addr *ea1, + const struct ether_addr *ea2) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea1->addr_bytes[i] != ea2->addr_bytes[i]) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is filled with zeros; + * false (0) otherwise. + */ +static inline int is_zero_ether_addr(const struct ether_addr *ea) +{ + int i; + for (i = 0; i < ETHER_ADDR_LEN; i++) + if (ea->addr_bytes[i] != 0x00) + return 0; + return 1; +} + +/** + * Check if an Ethernet address is a unicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a unicast address; + * false (0) otherwise. + */ +static inline int is_unicast_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_GROUP_ADDR) == 0; +} + +/** + * Check if an Ethernet address is a multicast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a multicast address; + * false (0) otherwise. + */ +static inline int is_multicast_ether_addr(const struct ether_addr *ea) +{ + return ea->addr_bytes[0] & ETHER_GROUP_ADDR; +} + +/** + * Check if an Ethernet address is a broadcast address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a broadcast address; + * false (0) otherwise. + */ +static inline int is_broadcast_ether_addr(const struct ether_addr *ea) +{ + const unaligned_uint16_t *ea_words = (const unaligned_uint16_t *)ea; + + return (ea_words[0] == 0xFFFF && ea_words[1] == 0xFFFF && + ea_words[2] == 0xFFFF); +} + +/** + * Check if an Ethernet address is a universally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a universally assigned address; + * false (0) otherwise. + */ +static inline int is_universal_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) == 0; +} + +/** + * Check if an Ethernet address is a locally assigned address. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is a locally assigned address; + * false (0) otherwise. + */ +static inline int is_local_admin_ether_addr(const struct ether_addr *ea) +{ + return (ea->addr_bytes[0] & ETHER_LOCAL_ADMIN_ADDR) != 0; +} + +/** + * Check if an Ethernet address is a valid address. Checks that the address is a + * unicast address and is not filled with zeros. + * + * @param ea + * A pointer to a ether_addr structure containing the ethernet address + * to check. + * @return + * True (1) if the given ethernet address is valid; + * false (0) otherwise. + */ +static inline int is_valid_assigned_ether_addr(const struct ether_addr *ea) +{ + return is_unicast_ether_addr(ea) && (! is_zero_ether_addr(ea)); +} + +/** + * Generate a random Ethernet address that is locally administered + * and not multicast. + * @param addr + * A pointer to Ethernet address. + */ +static inline void eth_random_addr(uint8_t *addr) +{ + uint64_t rand = rte_rand(); + uint8_t *p = (uint8_t*)&rand; + + rte_memcpy(addr, p, ETHER_ADDR_LEN); + addr[0] &= ~ETHER_GROUP_ADDR; /* clear multicast bit */ + addr[0] |= ETHER_LOCAL_ADMIN_ADDR; /* set local assignment bit */ +} + +/** + * Fast copy an Ethernet address. + * + * @param ea_from + * A pointer to a ether_addr structure holding the Ethernet address to copy. + * @param ea_to + * A pointer to a ether_addr structure where to copy the Ethernet address. + */ +static inline void ether_addr_copy(const struct ether_addr *ea_from, + struct ether_addr *ea_to) +{ +#ifdef __INTEL_COMPILER + uint16_t *from_words = (uint16_t *)(ea_from->addr_bytes); + uint16_t *to_words = (uint16_t *)(ea_to->addr_bytes); + + to_words[0] = from_words[0]; + to_words[1] = from_words[1]; + to_words[2] = from_words[2]; +#else + /* + * Use the common way, because of a strange gcc warning. + */ + *ea_to = *ea_from; +#endif +} + +#define ETHER_ADDR_FMT_SIZE 18 +/** + * Format 48bits Ethernet address in pattern xx:xx:xx:xx:xx:xx. + * + * @param buf + * A pointer to buffer contains the formatted MAC address. + * @param size + * The format buffer size. + * @param eth_addr + * A pointer to a ether_addr structure. + */ +static inline void +ether_format_addr(char *buf, uint16_t size, + const struct ether_addr *eth_addr) +{ + snprintf(buf, size, "%02X:%02X:%02X:%02X:%02X:%02X", + eth_addr->addr_bytes[0], + eth_addr->addr_bytes[1], + eth_addr->addr_bytes[2], + eth_addr->addr_bytes[3], + eth_addr->addr_bytes[4], + eth_addr->addr_bytes[5]); +} + +/** + * Ethernet header: Contains the destination address, source address + * and frame type. + */ +struct ether_hdr { + struct ether_addr d_addr; /**< Destination address. */ + struct ether_addr s_addr; /**< Source address. */ + uint16_t ether_type; /**< Frame type. */ +} __attribute__((__packed__)); + +/** + * Ethernet VLAN Header. + * Contains the 16-bit VLAN Tag Control Identifier and the Ethernet type + * of the encapsulated frame. + */ +struct vlan_hdr { + uint16_t vlan_tci; /**< Priority (3) + CFI (1) + Identifier Code (12) */ + uint16_t eth_proto;/**< Ethernet type of encapsulated frame. */ +} __attribute__((__packed__)); + +/** + * VXLAN protocol header. + * Contains the 8-bit flag, 24-bit VXLAN Network Identifier and + * Reserved fields (24 bits and 8 bits) + */ +struct vxlan_hdr { + uint32_t vx_flags; /**< flag (8) + Reserved (24). */ + uint32_t vx_vni; /**< VNI (24) + Reserved (8). */ +} __attribute__((__packed__)); + +/* Ethernet frame types */ +#define ETHER_TYPE_IPv4 0x0800 /**< IPv4 Protocol. */ +#define ETHER_TYPE_IPv6 0x86DD /**< IPv6 Protocol. */ +#define ETHER_TYPE_ARP 0x0806 /**< Arp Protocol. */ +#define ETHER_TYPE_RARP 0x8035 /**< Reverse Arp Protocol. */ +#define ETHER_TYPE_VLAN 0x8100 /**< IEEE 802.1Q VLAN tagging. */ +#define ETHER_TYPE_1588 0x88F7 /**< IEEE 802.1AS 1588 Precise Time Protocol. */ +#define ETHER_TYPE_SLOW 0x8809 /**< Slow protocols (LACP and Marker). */ +#define ETHER_TYPE_TEB 0x6558 /**< Transparent Ethernet Bridging. */ + +#define ETHER_VXLAN_HLEN (sizeof(struct udp_hdr) + sizeof(struct vxlan_hdr)) +/**< VXLAN tunnel header length. */ + +/** + * Extract VLAN tag information into mbuf + * + * Software version of VLAN stripping + * + * @param m + * The packet mbuf. + * @return + * - 0: Success + * - 1: not a vlan packet + */ +static inline int rte_vlan_strip(struct rte_mbuf *m) +{ + struct ether_hdr *eh + = rte_pktmbuf_mtod(m, struct ether_hdr *); + + if (eh->ether_type != rte_cpu_to_be_16(ETHER_TYPE_VLAN)) + return -1; + + struct vlan_hdr *vh = (struct vlan_hdr *)(eh + 1); + m->ol_flags |= PKT_RX_VLAN_PKT; + m->vlan_tci = rte_be_to_cpu_16(vh->vlan_tci); + + /* Copy ether header over rather than moving whole packet */ + memmove(rte_pktmbuf_adj(m, sizeof(struct vlan_hdr)), + eh, 2 * ETHER_ADDR_LEN); + + return 0; +} + +/** + * Insert VLAN tag into mbuf. + * + * Software version of VLAN unstripping + * + * @param m + * The packet mbuf. + * @return + * - 0: On success + * -EPERM: mbuf is is shared overwriting would be unsafe + * -ENOSPC: not enough headroom in mbuf + */ +static inline int rte_vlan_insert(struct rte_mbuf **m) +{ + struct ether_hdr *oh, *nh; + struct vlan_hdr *vh; + + /* Can't insert header if mbuf is shared */ + if (rte_mbuf_refcnt_read(*m) > 1) { + struct rte_mbuf *copy; + + copy = rte_pktmbuf_clone(*m, (*m)->pool); + if (unlikely(copy == NULL)) + return -ENOMEM; + rte_pktmbuf_free(*m); + *m = copy; + } + + oh = rte_pktmbuf_mtod(*m, struct ether_hdr *); + nh = (struct ether_hdr *) + rte_pktmbuf_prepend(*m, sizeof(struct vlan_hdr)); + if (nh == NULL) + return -ENOSPC; + + memmove(nh, oh, 2 * ETHER_ADDR_LEN); + nh->ether_type = rte_cpu_to_be_16(ETHER_TYPE_VLAN); + + vh = (struct vlan_hdr *) (nh + 1); + vh->vlan_tci = rte_cpu_to_be_16((*m)->vlan_tci); + + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ETHER_H_ */ diff --git a/lib/librte_ether/rte_ether_version.map b/lib/librte_ether/rte_ether_version.map new file mode 100644 index 00000000..214ecc73 --- /dev/null +++ b/lib/librte_ether/rte_ether_version.map @@ -0,0 +1,134 @@ +DPDK_2.2 { + global: + + _rte_eth_dev_callback_process; + rte_eth_add_rx_callback; + rte_eth_add_tx_callback; + rte_eth_allmulticast_disable; + rte_eth_allmulticast_enable; + rte_eth_allmulticast_get; + rte_eth_copy_pci_info; + rte_eth_dev_allocate; + rte_eth_dev_allocated; + rte_eth_dev_attach; + rte_eth_dev_bypass_event_show; + rte_eth_dev_bypass_event_store; + rte_eth_dev_bypass_init; + rte_eth_dev_bypass_state_set; + rte_eth_dev_bypass_state_show; + rte_eth_dev_bypass_ver_show; + rte_eth_dev_bypass_wd_reset; + rte_eth_dev_bypass_wd_timeout_show; + rte_eth_dev_callback_process; + rte_eth_dev_callback_register; + rte_eth_dev_callback_unregister; + rte_eth_dev_close; + rte_eth_dev_configure; + rte_eth_dev_count; + rte_eth_dev_default_mac_addr_set; + rte_eth_dev_detach; + rte_eth_dev_filter_ctrl; + rte_eth_dev_filter_supported; + rte_eth_dev_flow_ctrl_get; + rte_eth_dev_flow_ctrl_set; + rte_eth_dev_get_dcb_info; + rte_eth_dev_get_eeprom; + rte_eth_dev_get_eeprom_length; + rte_eth_dev_get_mtu; + rte_eth_dev_get_reg_info; + rte_eth_dev_get_reg_length; + rte_eth_dev_get_vlan_offload; + rte_eth_devices; + rte_eth_dev_info_get; + rte_eth_dev_is_valid_port; + rte_eth_dev_mac_addr_add; + rte_eth_dev_mac_addr_remove; + rte_eth_dev_priority_flow_ctrl_set; + rte_eth_dev_release_port; + rte_eth_dev_rss_hash_conf_get; + rte_eth_dev_rss_hash_update; + rte_eth_dev_rss_reta_query; + rte_eth_dev_rss_reta_update; + rte_eth_dev_rx_intr_ctl; + rte_eth_dev_rx_intr_ctl_q; + rte_eth_dev_rx_intr_disable; + rte_eth_dev_rx_intr_enable; + rte_eth_dev_rx_queue_start; + rte_eth_dev_rx_queue_stop; + rte_eth_dev_set_eeprom; + rte_eth_dev_set_link_down; + rte_eth_dev_set_link_up; + rte_eth_dev_set_mc_addr_list; + rte_eth_dev_set_mtu; + rte_eth_dev_set_rx_queue_stats_mapping; + rte_eth_dev_set_tx_queue_stats_mapping; + rte_eth_dev_set_vf_rx; + rte_eth_dev_set_vf_rxmode; + rte_eth_dev_set_vf_tx; + rte_eth_dev_set_vf_vlan_filter; + rte_eth_dev_set_vlan_ether_type; + rte_eth_dev_set_vlan_offload; + rte_eth_dev_set_vlan_pvid; + rte_eth_dev_set_vlan_strip_on_queue; + rte_eth_dev_socket_id; + rte_eth_dev_start; + rte_eth_dev_stop; + rte_eth_dev_tx_queue_start; + rte_eth_dev_tx_queue_stop; + rte_eth_dev_uc_all_hash_table_set; + rte_eth_dev_uc_hash_table_set; + rte_eth_dev_vlan_filter; + rte_eth_dev_wd_timeout_store; + rte_eth_dma_zone_reserve; + rte_eth_driver_register; + rte_eth_led_off; + rte_eth_led_on; + rte_eth_link; + rte_eth_link_get; + rte_eth_link_get_nowait; + rte_eth_macaddr_get; + rte_eth_mirror_rule_reset; + rte_eth_mirror_rule_set; + rte_eth_promiscuous_disable; + rte_eth_promiscuous_enable; + rte_eth_promiscuous_get; + rte_eth_remove_rx_callback; + rte_eth_remove_tx_callback; + rte_eth_rx_queue_info_get; + rte_eth_rx_queue_setup; + rte_eth_set_queue_rate_limit; + rte_eth_set_vf_rate_limit; + rte_eth_stats; + rte_eth_stats_get; + rte_eth_stats_reset; + rte_eth_timesync_adjust_time; + rte_eth_timesync_disable; + rte_eth_timesync_enable; + rte_eth_timesync_read_rx_timestamp; + rte_eth_timesync_read_time; + rte_eth_timesync_read_tx_timestamp; + rte_eth_timesync_write_time; + rte_eth_tx_queue_info_get; + rte_eth_tx_queue_setup; + rte_eth_xstats_get; + rte_eth_xstats_reset; + + local: *; +}; + +DPDK_16.04 { + global: + + rte_eth_dev_get_supported_ptypes; + rte_eth_dev_l2_tunnel_eth_type_conf; + rte_eth_dev_l2_tunnel_offload_set; + rte_eth_dev_set_vlan_ether_type; + rte_eth_dev_udp_tunnel_port_add; + rte_eth_dev_udp_tunnel_port_delete; + rte_eth_speed_bitflag; + rte_eth_tx_buffer_count_callback; + rte_eth_tx_buffer_drop_callback; + rte_eth_tx_buffer_init; + rte_eth_tx_buffer_set_err_callback; + +} DPDK_2.2; diff --git a/lib/librte_hash/Makefile b/lib/librte_hash/Makefile new file mode 100644 index 00000000..bb1ea990 --- /dev/null +++ b/lib/librte_hash/Makefile @@ -0,0 +1,61 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_hash.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_hash_version.map + +LIBABIVER := 2 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_HASH) := rte_cuckoo_hash.c +SRCS-$(CONFIG_RTE_LIBRTE_HASH) += rte_fbk_hash.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include := rte_hash.h +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_hash_crc.h +ifeq ($(CONFIG_RTE_ARCH_ARM64),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_crc_arm64.h +endif +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_jhash.h +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_thash.h +SYMLINK-$(CONFIG_RTE_LIBRTE_HASH)-include += rte_fbk_hash.h + +# this lib needs eal and ring +DEPDIRS-$(CONFIG_RTE_LIBRTE_HASH) += lib/librte_eal lib/librte_ring + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_hash/rte_cmp_arm64.h b/lib/librte_hash/rte_cmp_arm64.h new file mode 100644 index 00000000..6fd937b1 --- /dev/null +++ b/lib/librte_hash/rte_cmp_arm64.h @@ -0,0 +1,114 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +static int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, + size_t key_len __rte_unused) +{ + uint64_t x0, x1, y0, y1; + + asm volatile( + "ldp %x[x1], %x[x0], [%x[p1]]" + : [x1]"=r"(x1), [x0]"=r"(x0) + : [p1]"r"(key1) + ); + asm volatile( + "ldp %x[y1], %x[y0], [%x[p2]]" + : [y1]"=r"(y1), [y0]"=r"(y0) + : [p2]"r"(key2) + ); + x0 ^= y0; + x1 ^= y1; + return !(x0 == 0 && x1 == 0); +} + +static int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len); +} + +static int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 96, + (const char *) key2 + 96, key_len); +} + +static int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} diff --git a/lib/librte_hash/rte_cmp_x86.h b/lib/librte_hash/rte_cmp_x86.h new file mode 100644 index 00000000..e8c484d6 --- /dev/null +++ b/lib/librte_hash/rte_cmp_x86.h @@ -0,0 +1,109 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* Functions to compare multiple of 16 byte keys (up to 128 bytes) */ +static int +rte_hash_k16_cmp_eq(const void *key1, const void *key2, size_t key_len __rte_unused) +{ + const __m128i k1 = _mm_loadu_si128((const __m128i *) key1); + const __m128i k2 = _mm_loadu_si128((const __m128i *) key2); +#ifdef RTE_MACHINE_CPUFLAG_SSE4_1 + const __m128i x = _mm_xor_si128(k1, k2); + + return !_mm_test_all_zeros(x, x); +#else + const __m128i x = _mm_cmpeq_epi32(k1, k2); + + return _mm_movemask_epi8(x) != 0xffff; +#endif +} + +static int +rte_hash_k32_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len); +} + +static int +rte_hash_k48_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k16_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 16, + (const char *) key2 + 16, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k64_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k32_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 32, + (const char *) key2 + 32, key_len); +} + +static int +rte_hash_k80_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k96_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} + +static int +rte_hash_k112_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k32_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len) || + rte_hash_k16_cmp_eq((const char *) key1 + 96, + (const char *) key2 + 96, key_len); +} + +static int +rte_hash_k128_cmp_eq(const void *key1, const void *key2, size_t key_len) +{ + return rte_hash_k64_cmp_eq(key1, key2, key_len) || + rte_hash_k64_cmp_eq((const char *) key1 + 64, + (const char *) key2 + 64, key_len); +} diff --git a/lib/librte_hash/rte_crc_arm64.h b/lib/librte_hash/rte_crc_arm64.h new file mode 100644 index 00000000..7dd6334e --- /dev/null +++ b/lib/librte_hash/rte_crc_arm64.h @@ -0,0 +1,215 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Cavium networks. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CRC_ARM64_H_ +#define _RTE_CRC_ARM64_H_ + +/** + * @file + * + * RTE CRC arm64 Hash + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +static inline uint32_t +crc32c_arm64_u8(uint8_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32cb %w[crc], %w[crc], %w[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +static inline uint32_t +crc32c_arm64_u16(uint16_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32ch %w[crc], %w[crc], %w[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +static inline uint32_t +crc32c_arm64_u32(uint32_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32cw %w[crc], %w[crc], %w[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +static inline uint32_t +crc32c_arm64_u64(uint64_t data, uint32_t init_val) +{ + asm(".arch armv8-a+crc"); + __asm__ volatile( + "crc32cx %w[crc], %w[crc], %x[value]" + : [crc] "+r" (init_val) + : [value] "r" (data)); + return init_val; +} + +/** + * Allow or disallow use of arm64 SIMD instrinsics for CRC32 hash + * calculation. + * + * @param alg + * An OR of following flags: + * - (CRC32_SW) Don't use arm64 crc intrinsics + * - (CRC32_ARM64) Use ARMv8 CRC intrinsic if available + * + */ +static inline void +rte_hash_crc_set_alg(uint8_t alg) +{ + switch (alg) { + case CRC32_ARM64: + if (!rte_cpu_get_flag_enabled(RTE_CPUFLAG_CRC32)) + alg = CRC32_SW; + case CRC32_SW: + crc32_alg = alg; + default: + break; + } +} + +/* Setting the best available algorithm */ +static inline void __attribute__((constructor)) +rte_hash_crc_init_alg(void) +{ + rte_hash_crc_set_alg(CRC32_ARM64); +} + +/** + * Use single crc32 instruction to perform a hash on a 1 byte value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_1byte(uint8_t data, uint32_t init_val) +{ + if (likely(crc32_alg & CRC32_ARM64)) + return crc32c_arm64_u8(data, init_val); + + return crc32c_1byte(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 2 bytes value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_2byte(uint16_t data, uint32_t init_val) +{ + if (likely(crc32_alg & CRC32_ARM64)) + return crc32c_arm64_u16(data, init_val); + + return crc32c_2bytes(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 4 byte value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_4byte(uint32_t data, uint32_t init_val) +{ + if (likely(crc32_alg & CRC32_ARM64)) + return crc32c_arm64_u32(data, init_val); + + return crc32c_1word(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 8 byte value. + * Fall back to software crc32 implementation in case arm64 crc intrinsics is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_8byte(uint64_t data, uint32_t init_val) +{ + if (likely(crc32_alg == CRC32_ARM64)) + return crc32c_arm64_u64(data, init_val); + + return crc32c_2words(data, init_val); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CRC_ARM64_H_ */ diff --git a/lib/librte_hash/rte_cuckoo_hash.c b/lib/librte_hash/rte_cuckoo_hash.c new file mode 100644 index 00000000..7b7d1f85 --- /dev/null +++ b/lib/librte_hash/rte_cuckoo_hash.c @@ -0,0 +1,1323 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include /* for definition of RTE_CACHE_LINE_SIZE */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_hash.h" +#if defined(RTE_ARCH_X86) +#include "rte_cmp_x86.h" +#endif + +#if defined(RTE_ARCH_ARM64) +#include "rte_cmp_arm64.h" +#endif + +TAILQ_HEAD(rte_hash_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_hash_tailq = { + .name = "RTE_HASH", +}; +EAL_REGISTER_TAILQ(rte_hash_tailq) + +/* Macro to enable/disable run-time checking of function parameters */ +#if defined(RTE_LIBRTE_HASH_DEBUG) +#define RETURN_IF_TRUE(cond, retval) do { \ + if (cond) \ + return retval; \ +} while (0) +#else +#define RETURN_IF_TRUE(cond, retval) +#endif + +/* Hash function used if none is specified */ +#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#include +#define DEFAULT_HASH_FUNC rte_hash_crc +#else +#include +#define DEFAULT_HASH_FUNC rte_jhash +#endif + +/** Number of items per bucket. */ +#define RTE_HASH_BUCKET_ENTRIES 4 + +#define NULL_SIGNATURE 0 + +#define KEY_ALIGNMENT 16 + +#define LCORE_CACHE_SIZE 8 + +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) +/* + * All different options to select a key compare function, + * based on the key size and custom function. + */ +enum cmp_jump_table_case { + KEY_CUSTOM = 0, + KEY_16_BYTES, + KEY_32_BYTES, + KEY_48_BYTES, + KEY_64_BYTES, + KEY_80_BYTES, + KEY_96_BYTES, + KEY_112_BYTES, + KEY_128_BYTES, + KEY_OTHER_BYTES, + NUM_KEY_CMP_CASES, +}; + +/* + * Table storing all different key compare functions + * (multi-process supported) + */ +const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { + NULL, + rte_hash_k16_cmp_eq, + rte_hash_k32_cmp_eq, + rte_hash_k48_cmp_eq, + rte_hash_k64_cmp_eq, + rte_hash_k80_cmp_eq, + rte_hash_k96_cmp_eq, + rte_hash_k112_cmp_eq, + rte_hash_k128_cmp_eq, + memcmp +}; +#else +/* + * All different options to select a key compare function, + * based on the key size and custom function. + */ +enum cmp_jump_table_case { + KEY_CUSTOM = 0, + KEY_OTHER_BYTES, + NUM_KEY_CMP_CASES, +}; + +/* + * Table storing all different key compare functions + * (multi-process supported) + */ +const rte_hash_cmp_eq_t cmp_jump_table[NUM_KEY_CMP_CASES] = { + NULL, + memcmp +}; + +#endif + +struct lcore_cache { + unsigned len; /**< Cache len */ + void *objs[LCORE_CACHE_SIZE]; /**< Cache objects */ +} __rte_cache_aligned; + +/** A hash table structure. */ +struct rte_hash { + char name[RTE_HASH_NAMESIZE]; /**< Name of the hash. */ + uint32_t entries; /**< Total table entries. */ + uint32_t num_buckets; /**< Number of buckets in table. */ + uint32_t key_len; /**< Length of hash key. */ + rte_hash_function hash_func; /**< Function used to calculate hash. */ + uint32_t hash_func_init_val; /**< Init value used by hash_func. */ + rte_hash_cmp_eq_t rte_hash_custom_cmp_eq; + /**< Custom function used to compare keys. */ + enum cmp_jump_table_case cmp_jump_table_idx; + /**< Indicates which compare function to use. */ + uint32_t bucket_bitmask; /**< Bitmask for getting bucket index + from hash signature. */ + uint32_t key_entry_size; /**< Size of each key entry. */ + + struct rte_ring *free_slots; /**< Ring that stores all indexes + of the free slots in the key table */ + void *key_store; /**< Table storing all keys and data */ + struct rte_hash_bucket *buckets; /**< Table with buckets storing all the + hash values and key indexes + to the key table*/ + uint8_t hw_trans_mem_support; /**< Hardware transactional + memory support */ + struct lcore_cache *local_free_slots; + /**< Local cache per lcore, storing some indexes of the free slots */ +} __rte_cache_aligned; + +/* Structure storing both primary and secondary hashes */ +struct rte_hash_signatures { + union { + struct { + hash_sig_t current; + hash_sig_t alt; + }; + uint64_t sig; + }; +}; + +/* Structure that stores key-value pair */ +struct rte_hash_key { + union { + uintptr_t idata; + void *pdata; + }; + /* Variable key size */ + char key[0]; +} __attribute__((aligned(KEY_ALIGNMENT))); + +/** Bucket structure */ +struct rte_hash_bucket { + struct rte_hash_signatures signatures[RTE_HASH_BUCKET_ENTRIES]; + /* Includes dummy key index that always contains index 0 */ + uint32_t key_idx[RTE_HASH_BUCKET_ENTRIES + 1]; + uint8_t flag[RTE_HASH_BUCKET_ENTRIES]; +} __rte_cache_aligned; + +struct rte_hash * +rte_hash_find_existing(const char *name) +{ + struct rte_hash *h = NULL; + struct rte_tailq_entry *te; + struct rte_hash_list *hash_list; + + hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, hash_list, next) { + h = (struct rte_hash *) te->data; + if (strncmp(name, h->name, RTE_HASH_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + return h; +} + +void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func) +{ + h->rte_hash_custom_cmp_eq = func; +} + +static inline int +rte_hash_cmp_eq(const void *key1, const void *key2, const struct rte_hash *h) +{ + if (h->cmp_jump_table_idx == KEY_CUSTOM) + return h->rte_hash_custom_cmp_eq(key1, key2, h->key_len); + else + return cmp_jump_table[h->cmp_jump_table_idx](key1, key2, h->key_len); +} + +struct rte_hash * +rte_hash_create(const struct rte_hash_parameters *params) +{ + struct rte_hash *h = NULL; + struct rte_tailq_entry *te = NULL; + struct rte_hash_list *hash_list; + struct rte_ring *r = NULL; + char hash_name[RTE_HASH_NAMESIZE]; + void *k = NULL; + void *buckets = NULL; + char ring_name[RTE_RING_NAMESIZE]; + unsigned num_key_slots; + unsigned hw_trans_mem_support = 0; + unsigned i; + + hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); + + if (params == NULL) { + RTE_LOG(ERR, HASH, "rte_hash_create has no parameters\n"); + return NULL; + } + + /* Check for valid parameters */ + if ((params->entries > RTE_HASH_ENTRIES_MAX) || + (params->entries < RTE_HASH_BUCKET_ENTRIES) || + !rte_is_power_of_2(RTE_HASH_BUCKET_ENTRIES) || + (params->key_len == 0)) { + rte_errno = EINVAL; + RTE_LOG(ERR, HASH, "rte_hash_create has invalid parameters\n"); + return NULL; + } + + /* Check extra flags field to check extra options. */ + if (params->extra_flag & RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT) + hw_trans_mem_support = 1; + + /* Store all keys and leave the first entry as a dummy entry for lookup_bulk */ + if (hw_trans_mem_support) + /* + * Increase number of slots by total number of indices + * that can be stored in the lcore caches + * except for the first cache + */ + num_key_slots = params->entries + (RTE_MAX_LCORE - 1) * + LCORE_CACHE_SIZE + 1; + else + num_key_slots = params->entries + 1; + + snprintf(ring_name, sizeof(ring_name), "HT_%s", params->name); + r = rte_ring_create(ring_name, rte_align32pow2(num_key_slots), + params->socket_id, 0); + if (r == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err; + } + + snprintf(hash_name, sizeof(hash_name), "HT_%s", params->name); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* guarantee there's no existing: this is normally already checked + * by ring creation above */ + TAILQ_FOREACH(te, hash_list, next) { + h = (struct rte_hash *) te->data; + if (strncmp(params->name, h->name, RTE_HASH_NAMESIZE) == 0) + break; + } + h = NULL; + if (te != NULL) { + rte_errno = EEXIST; + te = NULL; + goto err_unlock; + } + + te = rte_zmalloc("HASH_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, HASH, "tailq entry allocation failed\n"); + goto err_unlock; + } + + h = (struct rte_hash *)rte_zmalloc_socket(hash_name, sizeof(struct rte_hash), + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (h == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err_unlock; + } + + const uint32_t num_buckets = rte_align32pow2(params->entries) + / RTE_HASH_BUCKET_ENTRIES; + + buckets = rte_zmalloc_socket(NULL, + num_buckets * sizeof(struct rte_hash_bucket), + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (buckets == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err_unlock; + } + + const uint32_t key_entry_size = sizeof(struct rte_hash_key) + params->key_len; + const uint64_t key_tbl_size = (uint64_t) key_entry_size * num_key_slots; + + k = rte_zmalloc_socket(NULL, key_tbl_size, + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (k == NULL) { + RTE_LOG(ERR, HASH, "memory allocation failed\n"); + goto err_unlock; + } + +/* + * If x86 architecture is used, select appropriate compare function, + * which may use x86 instrinsics, otherwise use memcmp + */ +#if defined(RTE_ARCH_X86) || defined(RTE_ARCH_ARM64) + /* Select function to compare keys */ + switch (params->key_len) { + case 16: + h->cmp_jump_table_idx = KEY_16_BYTES; + break; + case 32: + h->cmp_jump_table_idx = KEY_32_BYTES; + break; + case 48: + h->cmp_jump_table_idx = KEY_48_BYTES; + break; + case 64: + h->cmp_jump_table_idx = KEY_64_BYTES; + break; + case 80: + h->cmp_jump_table_idx = KEY_80_BYTES; + break; + case 96: + h->cmp_jump_table_idx = KEY_96_BYTES; + break; + case 112: + h->cmp_jump_table_idx = KEY_112_BYTES; + break; + case 128: + h->cmp_jump_table_idx = KEY_128_BYTES; + break; + default: + /* If key is not multiple of 16, use generic memcmp */ + h->cmp_jump_table_idx = KEY_OTHER_BYTES; + } +#else + h->cmp_jump_table_idx = KEY_OTHER_BYTES; +#endif + + if (hw_trans_mem_support) { + h->local_free_slots = rte_zmalloc_socket(NULL, + sizeof(struct lcore_cache) * RTE_MAX_LCORE, + RTE_CACHE_LINE_SIZE, params->socket_id); + } + + /* Setup hash context */ + snprintf(h->name, sizeof(h->name), "%s", params->name); + h->entries = params->entries; + h->key_len = params->key_len; + h->key_entry_size = key_entry_size; + h->hash_func_init_val = params->hash_func_init_val; + + h->num_buckets = num_buckets; + h->bucket_bitmask = h->num_buckets - 1; + h->buckets = buckets; + h->hash_func = (params->hash_func == NULL) ? + DEFAULT_HASH_FUNC : params->hash_func; + h->key_store = k; + h->free_slots = r; + h->hw_trans_mem_support = hw_trans_mem_support; + + /* populate the free slots ring. Entry zero is reserved for key misses */ + for (i = 1; i < params->entries + 1; i++) + rte_ring_sp_enqueue(r, (void *)((uintptr_t) i)); + + te->data = (void *) h; + TAILQ_INSERT_TAIL(hash_list, te, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return h; +err_unlock: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); +err: + rte_ring_free(r); + rte_free(te); + rte_free(h); + rte_free(buckets); + rte_free(k); + return NULL; +} + +void +rte_hash_free(struct rte_hash *h) +{ + struct rte_tailq_entry *te; + struct rte_hash_list *hash_list; + + if (h == NULL) + return; + + hash_list = RTE_TAILQ_CAST(rte_hash_tailq.head, rte_hash_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find out tailq entry */ + TAILQ_FOREACH(te, hash_list, next) { + if (te->data == (void *) h) + break; + } + + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(hash_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (h->hw_trans_mem_support) + rte_free(h->local_free_slots); + + rte_ring_free(h->free_slots); + rte_free(h->key_store); + rte_free(h->buckets); + rte_free(h); + rte_free(te); +} + +hash_sig_t +rte_hash_hash(const struct rte_hash *h, const void *key) +{ + /* calc hash result by key */ + return h->hash_func(key, h->key_len, h->hash_func_init_val); +} + +/* Calc the secondary hash value from the primary hash value of a given key */ +static inline hash_sig_t +rte_hash_secondary_hash(const hash_sig_t primary_hash) +{ + static const unsigned all_bits_shift = 12; + static const unsigned alt_bits_xor = 0x5bd1e995; + + uint32_t tag = primary_hash >> all_bits_shift; + + return primary_hash ^ ((tag + 1) * alt_bits_xor); +} + +void +rte_hash_reset(struct rte_hash *h) +{ + void *ptr; + unsigned i; + + if (h == NULL) + return; + + memset(h->buckets, 0, h->num_buckets * sizeof(struct rte_hash_bucket)); + memset(h->key_store, 0, h->key_entry_size * (h->entries + 1)); + + /* clear the free ring */ + while (rte_ring_dequeue(h->free_slots, &ptr) == 0) + rte_pause(); + + /* Repopulate the free slots ring. Entry zero is reserved for key misses */ + for (i = 1; i < h->entries + 1; i++) + rte_ring_sp_enqueue(h->free_slots, (void *)((uintptr_t) i)); + + if (h->hw_trans_mem_support) { + /* Reset local caches per lcore */ + for (i = 0; i < RTE_MAX_LCORE; i++) + h->local_free_slots[i].len = 0; + } +} + +/* Search for an entry that can be pushed to its alternative location */ +static inline int +make_space_bucket(const struct rte_hash *h, struct rte_hash_bucket *bkt) +{ + unsigned i, j; + int ret; + uint32_t next_bucket_idx; + struct rte_hash_bucket *next_bkt[RTE_HASH_BUCKET_ENTRIES]; + + /* + * Push existing item (search for bucket with space in + * alternative locations) to its alternative location + */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + /* Search for space in alternative locations */ + next_bucket_idx = bkt->signatures[i].alt & h->bucket_bitmask; + next_bkt[i] = &h->buckets[next_bucket_idx]; + for (j = 0; j < RTE_HASH_BUCKET_ENTRIES; j++) { + if (next_bkt[i]->signatures[j].sig == NULL_SIGNATURE) + break; + } + + if (j != RTE_HASH_BUCKET_ENTRIES) + break; + } + + /* Alternative location has spare room (end of recursive function) */ + if (i != RTE_HASH_BUCKET_ENTRIES) { + next_bkt[i]->signatures[j].alt = bkt->signatures[i].current; + next_bkt[i]->signatures[j].current = bkt->signatures[i].alt; + next_bkt[i]->key_idx[j] = bkt->key_idx[i]; + return i; + } + + /* Pick entry that has not been pushed yet */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) + if (bkt->flag[i] == 0) + break; + + /* All entries have been pushed, so entry cannot be added */ + if (i == RTE_HASH_BUCKET_ENTRIES) + return -ENOSPC; + + /* Set flag to indicate that this entry is going to be pushed */ + bkt->flag[i] = 1; + /* Need room in alternative bucket to insert the pushed entry */ + ret = make_space_bucket(h, next_bkt[i]); + /* + * After recursive function. + * Clear flags and insert the pushed entry + * in its alternative location if successful, + * or return error + */ + bkt->flag[i] = 0; + if (ret >= 0) { + next_bkt[i]->signatures[ret].alt = bkt->signatures[i].current; + next_bkt[i]->signatures[ret].current = bkt->signatures[i].alt; + next_bkt[i]->key_idx[ret] = bkt->key_idx[i]; + return i; + } else + return ret; + +} + +/* + * Function called to enqueue back an index in the cache/ring, + * as slot has not being used and it can be used in the + * next addition attempt. + */ +static inline void +enqueue_slot_back(const struct rte_hash *h, + struct lcore_cache *cached_free_slots, + void *slot_id) +{ + if (h->hw_trans_mem_support) { + cached_free_slots->objs[cached_free_slots->len] = slot_id; + cached_free_slots->len++; + } else + rte_ring_sp_enqueue(h->free_slots, slot_id); +} + +static inline int32_t +__rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig, void *data) +{ + hash_sig_t alt_hash; + uint32_t prim_bucket_idx, sec_bucket_idx; + unsigned i; + struct rte_hash_bucket *prim_bkt, *sec_bkt; + struct rte_hash_key *new_k, *k, *keys = h->key_store; + void *slot_id = NULL; + uint32_t new_idx; + int ret; + unsigned n_slots; + unsigned lcore_id; + struct lcore_cache *cached_free_slots = NULL; + + prim_bucket_idx = sig & h->bucket_bitmask; + prim_bkt = &h->buckets[prim_bucket_idx]; + rte_prefetch0(prim_bkt); + + alt_hash = rte_hash_secondary_hash(sig); + sec_bucket_idx = alt_hash & h->bucket_bitmask; + sec_bkt = &h->buckets[sec_bucket_idx]; + rte_prefetch0(sec_bkt); + + /* Get a new slot for storing the new key */ + if (h->hw_trans_mem_support) { + lcore_id = rte_lcore_id(); + cached_free_slots = &h->local_free_slots[lcore_id]; + /* Try to get a free slot from the local cache */ + if (cached_free_slots->len == 0) { + /* Need to get another burst of free slots from global ring */ + n_slots = rte_ring_mc_dequeue_burst(h->free_slots, + cached_free_slots->objs, LCORE_CACHE_SIZE); + if (n_slots == 0) + return -ENOSPC; + + cached_free_slots->len += n_slots; + } + + /* Get a free slot from the local cache */ + cached_free_slots->len--; + slot_id = cached_free_slots->objs[cached_free_slots->len]; + } else { + if (rte_ring_sc_dequeue(h->free_slots, &slot_id) != 0) + return -ENOSPC; + } + + new_k = RTE_PTR_ADD(keys, (uintptr_t)slot_id * h->key_entry_size); + rte_prefetch0(new_k); + new_idx = (uint32_t)((uintptr_t) slot_id); + + /* Check if key is already inserted in primary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (prim_bkt->signatures[i].current == sig && + prim_bkt->signatures[i].alt == alt_hash) { + k = (struct rte_hash_key *) ((char *)keys + + prim_bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + /* Enqueue index of free slot back in the ring. */ + enqueue_slot_back(h, cached_free_slots, slot_id); + /* Update data */ + k->pdata = data; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return prim_bkt->key_idx[i] - 1; + } + } + } + + /* Check if key is already inserted in secondary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (sec_bkt->signatures[i].alt == sig && + sec_bkt->signatures[i].current == alt_hash) { + k = (struct rte_hash_key *) ((char *)keys + + sec_bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + /* Enqueue index of free slot back in the ring. */ + enqueue_slot_back(h, cached_free_slots, slot_id); + /* Update data */ + k->pdata = data; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return sec_bkt->key_idx[i] - 1; + } + } + } + + /* Copy key */ + rte_memcpy(new_k->key, key, h->key_len); + new_k->pdata = data; + + /* Insert new entry is there is room in the primary bucket */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + /* Check if slot is available */ + if (likely(prim_bkt->signatures[i].sig == NULL_SIGNATURE)) { + prim_bkt->signatures[i].current = sig; + prim_bkt->signatures[i].alt = alt_hash; + prim_bkt->key_idx[i] = new_idx; + return new_idx - 1; + } + } + + /* Primary bucket is full, so we need to make space for new entry */ + ret = make_space_bucket(h, prim_bkt); + /* + * After recursive function. + * Insert the new entry in the position of the pushed entry + * if successful or return error and + * store the new slot back in the ring + */ + if (ret >= 0) { + prim_bkt->signatures[ret].current = sig; + prim_bkt->signatures[ret].alt = alt_hash; + prim_bkt->key_idx[ret] = new_idx; + return new_idx - 1; + } + + /* Error in addition, store new slot back in the ring and return error */ + enqueue_slot_back(h, cached_free_slots, (void *)((uintptr_t) new_idx)); + + return ret; +} + +int32_t +rte_hash_add_key_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_add_key_with_hash(h, key, sig, 0); +} + +int32_t +rte_hash_add_key(const struct rte_hash *h, const void *key) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), 0); +} + +int +rte_hash_add_key_with_hash_data(const struct rte_hash *h, + const void *key, hash_sig_t sig, void *data) +{ + int ret; + + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + ret = __rte_hash_add_key_with_hash(h, key, sig, data); + if (ret >= 0) + return 0; + else + return ret; +} + +int +rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data) +{ + int ret; + + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + + ret = __rte_hash_add_key_with_hash(h, key, rte_hash_hash(h, key), data); + if (ret >= 0) + return 0; + else + return ret; +} +static inline int32_t +__rte_hash_lookup_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data) +{ + uint32_t bucket_idx; + hash_sig_t alt_hash; + unsigned i; + struct rte_hash_bucket *bkt; + struct rte_hash_key *k, *keys = h->key_store; + + bucket_idx = sig & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in primary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == sig && + bkt->signatures[i].sig != NULL_SIGNATURE) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + if (data != NULL) + *data = k->pdata; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return bkt->key_idx[i] - 1; + } + } + } + + /* Calculate secondary hash */ + alt_hash = rte_hash_secondary_hash(sig); + bucket_idx = alt_hash & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in secondary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == alt_hash && + bkt->signatures[i].alt == sig) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + if (data != NULL) + *data = k->pdata; + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return bkt->key_idx[i] - 1; + } + } + } + + return -ENOENT; +} + +int32_t +rte_hash_lookup_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, sig, NULL); +} + +int32_t +rte_hash_lookup(const struct rte_hash *h, const void *key) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), NULL); +} + +int +rte_hash_lookup_with_hash_data(const struct rte_hash *h, + const void *key, hash_sig_t sig, void **data) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, sig, data); +} + +int +rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_lookup_with_hash(h, key, rte_hash_hash(h, key), data); +} + +static inline void +remove_entry(const struct rte_hash *h, struct rte_hash_bucket *bkt, unsigned i) +{ + unsigned lcore_id, n_slots; + struct lcore_cache *cached_free_slots; + + bkt->signatures[i].sig = NULL_SIGNATURE; + if (h->hw_trans_mem_support) { + lcore_id = rte_lcore_id(); + cached_free_slots = &h->local_free_slots[lcore_id]; + /* Cache full, need to free it. */ + if (cached_free_slots->len == LCORE_CACHE_SIZE) { + /* Need to enqueue the free slots in global ring. */ + n_slots = rte_ring_mp_enqueue_burst(h->free_slots, + cached_free_slots->objs, + LCORE_CACHE_SIZE); + cached_free_slots->len -= n_slots; + } + /* Put index of new free slot in cache. */ + cached_free_slots->objs[cached_free_slots->len] = + (void *)((uintptr_t)bkt->key_idx[i]); + cached_free_slots->len++; + } else { + rte_ring_sp_enqueue(h->free_slots, + (void *)((uintptr_t)bkt->key_idx[i])); + } +} + +static inline int32_t +__rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, + hash_sig_t sig) +{ + uint32_t bucket_idx; + hash_sig_t alt_hash; + unsigned i; + struct rte_hash_bucket *bkt; + struct rte_hash_key *k, *keys = h->key_store; + + bucket_idx = sig & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in primary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == sig && + bkt->signatures[i].sig != NULL_SIGNATURE) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + remove_entry(h, bkt, i); + + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return bkt->key_idx[i] - 1; + } + } + } + + /* Calculate secondary hash */ + alt_hash = rte_hash_secondary_hash(sig); + bucket_idx = alt_hash & h->bucket_bitmask; + bkt = &h->buckets[bucket_idx]; + + /* Check if key is in secondary location */ + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + if (bkt->signatures[i].current == alt_hash && + bkt->signatures[i].sig != NULL_SIGNATURE) { + k = (struct rte_hash_key *) ((char *)keys + + bkt->key_idx[i] * h->key_entry_size); + if (rte_hash_cmp_eq(key, k->key, h) == 0) { + remove_entry(h, bkt, i); + + /* + * Return index where key is stored, + * substracting the first dummy index + */ + return bkt->key_idx[i] - 1; + } + } + } + + return -ENOENT; +} + +int32_t +rte_hash_del_key_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_del_key_with_hash(h, key, sig); +} + +int32_t +rte_hash_del_key(const struct rte_hash *h, const void *key) +{ + RETURN_IF_TRUE(((h == NULL) || (key == NULL)), -EINVAL); + return __rte_hash_del_key_with_hash(h, key, rte_hash_hash(h, key)); +} + +/* Lookup bulk stage 0: Prefetch input key */ +static inline void +lookup_stage0(unsigned *idx, uint64_t *lookup_mask, + const void * const *keys) +{ + *idx = __builtin_ctzl(*lookup_mask); + if (*lookup_mask == 0) + *idx = 0; + + rte_prefetch0(keys[*idx]); + *lookup_mask &= ~(1llu << *idx); +} + +/* + * Lookup bulk stage 1: Calculate primary/secondary hashes + * and prefetch primary/secondary buckets + */ +static inline void +lookup_stage1(unsigned idx, hash_sig_t *prim_hash, hash_sig_t *sec_hash, + const struct rte_hash_bucket **primary_bkt, + const struct rte_hash_bucket **secondary_bkt, + hash_sig_t *hash_vals, const void * const *keys, + const struct rte_hash *h) +{ + *prim_hash = rte_hash_hash(h, keys[idx]); + hash_vals[idx] = *prim_hash; + *sec_hash = rte_hash_secondary_hash(*prim_hash); + + *primary_bkt = &h->buckets[*prim_hash & h->bucket_bitmask]; + *secondary_bkt = &h->buckets[*sec_hash & h->bucket_bitmask]; + + rte_prefetch0(*primary_bkt); + rte_prefetch0(*secondary_bkt); +} + +/* + * Lookup bulk stage 2: Search for match hashes in primary/secondary locations + * and prefetch first key slot + */ +static inline void +lookup_stage2(unsigned idx, hash_sig_t prim_hash, hash_sig_t sec_hash, + const struct rte_hash_bucket *prim_bkt, + const struct rte_hash_bucket *sec_bkt, + const struct rte_hash_key **key_slot, int32_t *positions, + uint64_t *extra_hits_mask, const void *keys, + const struct rte_hash *h) +{ + unsigned prim_hash_matches, sec_hash_matches, key_idx, i; + unsigned total_hash_matches; + + prim_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; + sec_hash_matches = 1 << RTE_HASH_BUCKET_ENTRIES; + for (i = 0; i < RTE_HASH_BUCKET_ENTRIES; i++) { + prim_hash_matches |= ((prim_hash == prim_bkt->signatures[i].current) << i); + sec_hash_matches |= ((sec_hash == sec_bkt->signatures[i].current) << i); + } + + key_idx = prim_bkt->key_idx[__builtin_ctzl(prim_hash_matches)]; + if (key_idx == 0) + key_idx = sec_bkt->key_idx[__builtin_ctzl(sec_hash_matches)]; + + total_hash_matches = (prim_hash_matches | + (sec_hash_matches << (RTE_HASH_BUCKET_ENTRIES + 1))); + *key_slot = (const struct rte_hash_key *) ((const char *)keys + + key_idx * h->key_entry_size); + + rte_prefetch0(*key_slot); + /* + * Return index where key is stored, + * substracting the first dummy index + */ + positions[idx] = (key_idx - 1); + + *extra_hits_mask |= (uint64_t)(__builtin_popcount(total_hash_matches) > 3) << idx; + +} + + +/* Lookup bulk stage 3: Check if key matches, update hit mask and return data */ +static inline void +lookup_stage3(unsigned idx, const struct rte_hash_key *key_slot, const void * const *keys, + const int32_t *positions, void *data[], uint64_t *hits, + const struct rte_hash *h) +{ + unsigned hit; + unsigned key_idx; + + hit = !rte_hash_cmp_eq(key_slot->key, keys[idx], h); + if (data != NULL) + data[idx] = key_slot->pdata; + + key_idx = positions[idx] + 1; + /* + * If key index is 0, force hit to be 0, in case key to be looked up + * is all zero (as in the dummy slot), which would result in a wrong hit + */ + *hits |= (uint64_t)(hit && !!key_idx) << idx; +} + +static inline void +__rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + uint32_t num_keys, int32_t *positions, + uint64_t *hit_mask, void *data[]) +{ + uint64_t hits = 0; + uint64_t extra_hits_mask = 0; + uint64_t lookup_mask, miss_mask; + unsigned idx; + const void *key_store = h->key_store; + int ret; + hash_sig_t hash_vals[RTE_HASH_LOOKUP_BULK_MAX]; + + unsigned idx00, idx01, idx10, idx11, idx20, idx21, idx30, idx31; + const struct rte_hash_bucket *primary_bkt10, *primary_bkt11; + const struct rte_hash_bucket *secondary_bkt10, *secondary_bkt11; + const struct rte_hash_bucket *primary_bkt20, *primary_bkt21; + const struct rte_hash_bucket *secondary_bkt20, *secondary_bkt21; + const struct rte_hash_key *k_slot20, *k_slot21, *k_slot30, *k_slot31; + hash_sig_t primary_hash10, primary_hash11; + hash_sig_t secondary_hash10, secondary_hash11; + hash_sig_t primary_hash20, primary_hash21; + hash_sig_t secondary_hash20, secondary_hash21; + + lookup_mask = (uint64_t) -1 >> (64 - num_keys); + miss_mask = lookup_mask; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + + idx10 = idx00, idx11 = idx01; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + idx10 = idx00, idx11 = idx01; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, + secondary_bkt20, &k_slot20, positions, &extra_hits_mask, + key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, + secondary_bkt21, &k_slot21, positions, &extra_hits_mask, + key_store, h); + + while (lookup_mask) { + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + idx10 = idx00, idx11 = idx01; + + lookup_stage0(&idx00, &lookup_mask, keys); + lookup_stage0(&idx01, &lookup_mask, keys); + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + lookup_stage2(idx20, primary_hash20, secondary_hash20, + primary_bkt20, secondary_bkt20, &k_slot20, positions, + &extra_hits_mask, key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, + primary_bkt21, secondary_bkt21, &k_slot21, positions, + &extra_hits_mask, key_store, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + } + + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + idx10 = idx00, idx11 = idx01; + + lookup_stage1(idx10, &primary_hash10, &secondary_hash10, + &primary_bkt10, &secondary_bkt10, hash_vals, keys, h); + lookup_stage1(idx11, &primary_hash11, &secondary_hash11, + &primary_bkt11, &secondary_bkt11, hash_vals, keys, h); + lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, + secondary_bkt20, &k_slot20, positions, &extra_hits_mask, + key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, + secondary_bkt21, &k_slot21, positions, &extra_hits_mask, + key_store, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + primary_bkt20 = primary_bkt10; + primary_bkt21 = primary_bkt11; + secondary_bkt20 = secondary_bkt10; + secondary_bkt21 = secondary_bkt11; + primary_hash20 = primary_hash10; + primary_hash21 = primary_hash11; + secondary_hash20 = secondary_hash10; + secondary_hash21 = secondary_hash11; + idx20 = idx10, idx21 = idx11; + + lookup_stage2(idx20, primary_hash20, secondary_hash20, primary_bkt20, + secondary_bkt20, &k_slot20, positions, &extra_hits_mask, + key_store, h); + lookup_stage2(idx21, primary_hash21, secondary_hash21, primary_bkt21, + secondary_bkt21, &k_slot21, positions, &extra_hits_mask, + key_store, h); + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + + k_slot30 = k_slot20, k_slot31 = k_slot21; + idx30 = idx20, idx31 = idx21; + + lookup_stage3(idx30, k_slot30, keys, positions, data, &hits, h); + lookup_stage3(idx31, k_slot31, keys, positions, data, &hits, h); + + /* ignore any items we have already found */ + extra_hits_mask &= ~hits; + + if (unlikely(extra_hits_mask)) { + /* run a single search for each remaining item */ + do { + idx = __builtin_ctzl(extra_hits_mask); + if (data != NULL) { + ret = rte_hash_lookup_with_hash_data(h, + keys[idx], hash_vals[idx], &data[idx]); + if (ret >= 0) + hits |= 1ULL << idx; + } else { + positions[idx] = rte_hash_lookup_with_hash(h, + keys[idx], hash_vals[idx]); + if (positions[idx] >= 0) + hits |= 1llu << idx; + } + extra_hits_mask &= ~(1llu << idx); + } while (extra_hits_mask); + } + + miss_mask &= ~hits; + if (unlikely(miss_mask)) { + do { + idx = __builtin_ctzl(miss_mask); + positions[idx] = -ENOENT; + miss_mask &= ~(1llu << idx); + } while (miss_mask); + } + + if (hit_mask != NULL) + *hit_mask = hits; +} + +int +rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + uint32_t num_keys, int32_t *positions) +{ + RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) || + (num_keys > RTE_HASH_LOOKUP_BULK_MAX) || + (positions == NULL)), -EINVAL); + + __rte_hash_lookup_bulk(h, keys, num_keys, positions, NULL, NULL); + return 0; +} + +int +rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys, + uint32_t num_keys, uint64_t *hit_mask, void *data[]) +{ + RETURN_IF_TRUE(((h == NULL) || (keys == NULL) || (num_keys == 0) || + (num_keys > RTE_HASH_LOOKUP_BULK_MAX) || + (hit_mask == NULL)), -EINVAL); + + int32_t positions[num_keys]; + + __rte_hash_lookup_bulk(h, keys, num_keys, positions, hit_mask, data); + + /* Return number of hits */ + return __builtin_popcountl(*hit_mask); +} + +int32_t +rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next) +{ + uint32_t bucket_idx, idx, position; + struct rte_hash_key *next_key; + + RETURN_IF_TRUE(((h == NULL) || (next == NULL)), -EINVAL); + + const uint32_t total_entries = h->num_buckets * RTE_HASH_BUCKET_ENTRIES; + /* Out of bounds */ + if (*next >= total_entries) + return -ENOENT; + + /* Calculate bucket and index of current iterator */ + bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES; + idx = *next % RTE_HASH_BUCKET_ENTRIES; + + /* If current position is empty, go to the next one */ + while (h->buckets[bucket_idx].signatures[idx].sig == NULL_SIGNATURE) { + (*next)++; + /* End of table */ + if (*next == total_entries) + return -ENOENT; + bucket_idx = *next / RTE_HASH_BUCKET_ENTRIES; + idx = *next % RTE_HASH_BUCKET_ENTRIES; + } + + /* Get position of entry in key table */ + position = h->buckets[bucket_idx].key_idx[idx]; + next_key = (struct rte_hash_key *) ((char *)h->key_store + + position * h->key_entry_size); + /* Return key and data */ + *key = next_key->key; + *data = next_key->pdata; + + /* Increment iterator */ + (*next)++; + + return position - 1; +} diff --git a/lib/librte_hash/rte_fbk_hash.c b/lib/librte_hash/rte_fbk_hash.c new file mode 100644 index 00000000..55c9f358 --- /dev/null +++ b/lib/librte_hash/rte_fbk_hash.c @@ -0,0 +1,231 @@ +/** + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_fbk_hash.h" + +TAILQ_HEAD(rte_fbk_hash_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_fbk_hash_tailq = { + .name = "RTE_FBK_HASH", +}; +EAL_REGISTER_TAILQ(rte_fbk_hash_tailq) + +/** + * Performs a lookup for an existing hash table, and returns a pointer to + * the table if found. + * + * @param name + * Name of the hash table to find + * + * @return + * pointer to hash table structure or NULL on error. + */ +struct rte_fbk_hash_table * +rte_fbk_hash_find_existing(const char *name) +{ + struct rte_fbk_hash_table *h = NULL; + struct rte_tailq_entry *te; + struct rte_fbk_hash_list *fbk_hash_list; + + fbk_hash_list = RTE_TAILQ_CAST(rte_fbk_hash_tailq.head, + rte_fbk_hash_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, fbk_hash_list, next) { + h = (struct rte_fbk_hash_table *) te->data; + if (strncmp(name, h->name, RTE_FBK_HASH_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + return h; +} + +/** + * Create a new hash table for use with four byte keys. + * + * @param params + * Parameters used in creation of hash table. + * + * @return + * Pointer to hash table structure that is used in future hash table + * operations, or NULL on error. + */ +struct rte_fbk_hash_table * +rte_fbk_hash_create(const struct rte_fbk_hash_params *params) +{ + struct rte_fbk_hash_table *ht = NULL; + struct rte_tailq_entry *te; + char hash_name[RTE_FBK_HASH_NAMESIZE]; + const uint32_t mem_size = + sizeof(*ht) + (sizeof(ht->t[0]) * params->entries); + uint32_t i; + struct rte_fbk_hash_list *fbk_hash_list; + + fbk_hash_list = RTE_TAILQ_CAST(rte_fbk_hash_tailq.head, + rte_fbk_hash_list); + + /* Error checking of parameters. */ + if ((!rte_is_power_of_2(params->entries)) || + (!rte_is_power_of_2(params->entries_per_bucket)) || + (params->entries == 0) || + (params->entries_per_bucket == 0) || + (params->entries_per_bucket > params->entries) || + (params->entries > RTE_FBK_HASH_ENTRIES_MAX) || + (params->entries_per_bucket > RTE_FBK_HASH_ENTRIES_PER_BUCKET_MAX)){ + rte_errno = EINVAL; + return NULL; + } + + snprintf(hash_name, sizeof(hash_name), "FBK_%s", params->name); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* guarantee there's no existing */ + TAILQ_FOREACH(te, fbk_hash_list, next) { + ht = (struct rte_fbk_hash_table *) te->data; + if (strncmp(params->name, ht->name, RTE_FBK_HASH_NAMESIZE) == 0) + break; + } + ht = NULL; + if (te != NULL) { + rte_errno = EEXIST; + goto exit; + } + + te = rte_zmalloc("FBK_HASH_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, HASH, "Failed to allocate tailq entry\n"); + goto exit; + } + + /* Allocate memory for table. */ + ht = (struct rte_fbk_hash_table *)rte_zmalloc_socket(hash_name, mem_size, + 0, params->socket_id); + if (ht == NULL) { + RTE_LOG(ERR, HASH, "Failed to allocate fbk hash table\n"); + rte_free(te); + goto exit; + } + + /* Set up hash table context. */ + snprintf(ht->name, sizeof(ht->name), "%s", params->name); + ht->entries = params->entries; + ht->entries_per_bucket = params->entries_per_bucket; + ht->used_entries = 0; + ht->bucket_mask = (params->entries / params->entries_per_bucket) - 1; + for (ht->bucket_shift = 0, i = 1; + (params->entries_per_bucket & i) == 0; + ht->bucket_shift++, i <<= 1) + ; /* empty loop body */ + + if (params->hash_func != NULL) { + ht->hash_func = params->hash_func; + ht->init_val = params->init_val; + } + else { + ht->hash_func = RTE_FBK_HASH_FUNC_DEFAULT; + ht->init_val = RTE_FBK_HASH_INIT_VAL_DEFAULT; + } + + te->data = (void *) ht; + + TAILQ_INSERT_TAIL(fbk_hash_list, te, next); + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return ht; +} + +/** + * Free all memory used by a hash table. + * + * @param ht + * Hash table to deallocate. + */ +void +rte_fbk_hash_free(struct rte_fbk_hash_table *ht) +{ + struct rte_tailq_entry *te; + struct rte_fbk_hash_list *fbk_hash_list; + + if (ht == NULL) + return; + + fbk_hash_list = RTE_TAILQ_CAST(rte_fbk_hash_tailq.head, + rte_fbk_hash_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find out tailq entry */ + TAILQ_FOREACH(te, fbk_hash_list, next) { + if (te->data == (void *) ht) + break; + } + + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(fbk_hash_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(ht); + rte_free(te); +} diff --git a/lib/librte_hash/rte_fbk_hash.h b/lib/librte_hash/rte_fbk_hash.h new file mode 100644 index 00000000..a430961d --- /dev/null +++ b/lib/librte_hash/rte_fbk_hash.h @@ -0,0 +1,396 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_FBK_HASH_H_ +#define _RTE_FBK_HASH_H_ + +/** + * @file + * + * This is a hash table implementation for four byte keys (fbk). + * + * Note that the return value of the add function should always be checked as, + * if a bucket is full, the key is not added even if there is space in other + * buckets. This keeps the lookup function very simple and therefore fast. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifndef RTE_FBK_HASH_FUNC_DEFAULT +#if defined(RTE_MACHINE_CPUFLAG_SSE4_2) || defined(RTE_MACHINE_CPUFLAG_CRC32) +#include +/** Default four-byte key hash function if none is specified. */ +#define RTE_FBK_HASH_FUNC_DEFAULT rte_hash_crc_4byte +#else +#include +#define RTE_FBK_HASH_FUNC_DEFAULT rte_jhash_1word +#endif +#endif + +#ifndef RTE_FBK_HASH_INIT_VAL_DEFAULT +/** Initialising value used when calculating hash. */ +#define RTE_FBK_HASH_INIT_VAL_DEFAULT 0xFFFFFFFF +#endif + +/** The maximum number of entries in the hash table that is supported. */ +#define RTE_FBK_HASH_ENTRIES_MAX (1 << 20) + +/** The maximum number of entries in each bucket that is supported. */ +#define RTE_FBK_HASH_ENTRIES_PER_BUCKET_MAX 256 + +/** Maximum size of string for naming the hash. */ +#define RTE_FBK_HASH_NAMESIZE 32 + +/** Type of function that can be used for calculating the hash value. */ +typedef uint32_t (*rte_fbk_hash_fn)(uint32_t key, uint32_t init_val); + +/** Parameters used when creating four-byte key hash table. */ +struct rte_fbk_hash_params { + const char *name; /**< Name of the hash table. */ + uint32_t entries; /**< Total number of entries. */ + uint32_t entries_per_bucket; /**< Number of entries in a bucket. */ + int socket_id; /**< Socket to allocate memory on. */ + rte_fbk_hash_fn hash_func; /**< The hash function. */ + uint32_t init_val; /**< For initialising hash function. */ +}; + +/** Individual entry in the four-byte key hash table. */ +union rte_fbk_hash_entry { + uint64_t whole_entry; /**< For accessing entire entry. */ + struct { + uint16_t is_entry; /**< Non-zero if entry is active. */ + uint16_t value; /**< Value returned by lookup. */ + uint32_t key; /**< Key used to find value. */ + } entry; /**< For accessing each entry part. */ +}; + + +/** The four-byte key hash table structure. */ +struct rte_fbk_hash_table { + char name[RTE_FBK_HASH_NAMESIZE]; /**< Name of the hash. */ + uint32_t entries; /**< Total number of entries. */ + uint32_t entries_per_bucket; /**< Number of entries in a bucket. */ + uint32_t used_entries; /**< How many entries are used. */ + uint32_t bucket_mask; /**< To find which bucket the key is in. */ + uint32_t bucket_shift; /**< Convert bucket to table offset. */ + rte_fbk_hash_fn hash_func; /**< The hash function. */ + uint32_t init_val; /**< For initialising hash function. */ + + /** A flat table of all buckets. */ + union rte_fbk_hash_entry t[0]; +}; + +/** + * Find the offset into hash table of the bucket containing a particular key. + * + * @param ht + * Pointer to hash table. + * @param key + * Key to calculate bucket for. + * @return + * Offset into hash table. + */ +static inline uint32_t +rte_fbk_hash_get_bucket(const struct rte_fbk_hash_table *ht, uint32_t key) +{ + return (ht->hash_func(key, ht->init_val) & ht->bucket_mask) << + ht->bucket_shift; +} + +/** + * Add a key to an existing hash table with bucket id. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param ht + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param value + * Value to associate with key. + * @param bucket + * Bucket to associate with key. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_add_key_with_bucket(struct rte_fbk_hash_table *ht, + uint32_t key, uint16_t value, uint32_t bucket) +{ + /* + * The writing of a new value to the hash table is done as a single + * 64bit operation. This should help prevent individual entries being + * corrupted due to race conditions, but it's still possible to + * overwrite entries that have just been made valid. + */ + const uint64_t new_entry = ((uint64_t)(key) << 32) | + ((uint64_t)(value) << 16) | + 1; /* 1 = is_entry bit. */ + uint32_t i; + + for (i = 0; i < ht->entries_per_bucket; i++) { + /* Set entry if unused. */ + if (! ht->t[bucket + i].entry.is_entry) { + ht->t[bucket + i].whole_entry = new_entry; + ht->used_entries++; + return 0; + } + /* Change value if key already exists. */ + if (ht->t[bucket + i].entry.key == key) { + ht->t[bucket + i].entry.value = value; + return 0; + } + } + + return -ENOSPC; /* No space in bucket. */ +} + +/** + * Add a key to an existing hash table. This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param ht + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param value + * Value to associate with key. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_add_key(struct rte_fbk_hash_table *ht, + uint32_t key, uint16_t value) +{ + return rte_fbk_hash_add_key_with_bucket(ht, + key, value, rte_fbk_hash_get_bucket(ht, key)); +} + +/** + * Remove a key with a given bucket id from an existing hash table. + * This operation is not multi-thread + * safe and should only be called from one thread. + * + * @param ht + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @param bucket + * Bucket id associate with key. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_delete_key_with_bucket(struct rte_fbk_hash_table *ht, + uint32_t key, uint32_t bucket) +{ + uint32_t last_entry = ht->entries_per_bucket - 1; + uint32_t i, j; + + for (i = 0; i < ht->entries_per_bucket; i++) { + if (ht->t[bucket + i].entry.key == key) { + /* Find last key in bucket. */ + for (j = ht->entries_per_bucket - 1; j > i; j-- ) { + if (! ht->t[bucket + j].entry.is_entry) { + last_entry = j - 1; + } + } + /* + * Move the last key to the deleted key's position, and + * delete the last key. lastEntry and i may be same but + * it doesn't matter. + */ + ht->t[bucket + i].whole_entry = + ht->t[bucket + last_entry].whole_entry; + ht->t[bucket + last_entry].whole_entry = 0; + + ht->used_entries--; + return 0; + } + } + + return -ENOENT; /* Key didn't exist. */ +} + +/** + * Remove a key from an existing hash table. This operation is not multi-thread + * safe and should only be called from one thread. + * + * @param ht + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @return + * 0 if ok, or negative value on error. + */ +static inline int +rte_fbk_hash_delete_key(struct rte_fbk_hash_table *ht, uint32_t key) +{ + return rte_fbk_hash_delete_key_with_bucket(ht, + key, rte_fbk_hash_get_bucket(ht, key)); +} + +/** + * Find a key in the hash table with a given bucketid. + * This operation is multi-thread safe. + * + * @param ht + * Hash table to look in. + * @param key + * Key to find. + * @param bucket + * Bucket associate to the key. + * @return + * The value that was associated with the key, or negative value on error. + */ +static inline int +rte_fbk_hash_lookup_with_bucket(const struct rte_fbk_hash_table *ht, + uint32_t key, uint32_t bucket) +{ + union rte_fbk_hash_entry current_entry; + uint32_t i; + + for (i = 0; i < ht->entries_per_bucket; i++) { + /* Single read of entry, which should be atomic. */ + current_entry.whole_entry = ht->t[bucket + i].whole_entry; + if (! current_entry.entry.is_entry) { + return -ENOENT; /* Error once we hit an empty field. */ + } + if (current_entry.entry.key == key) { + return current_entry.entry.value; + } + } + return -ENOENT; /* Key didn't exist. */ +} + +/** + * Find a key in the hash table. This operation is multi-thread safe. + * + * @param ht + * Hash table to look in. + * @param key + * Key to find. + * @return + * The value that was associated with the key, or negative value on error. + */ +static inline int +rte_fbk_hash_lookup(const struct rte_fbk_hash_table *ht, uint32_t key) +{ + return rte_fbk_hash_lookup_with_bucket(ht, + key, rte_fbk_hash_get_bucket(ht, key)); +} + +/** + * Delete all entries in a hash table. This operation is not multi-thread + * safe and should only be called from one thread. + * + * @param ht + * Hash table to delete entries in. + */ +static inline void +rte_fbk_hash_clear_all(struct rte_fbk_hash_table *ht) +{ + memset(ht->t, 0, sizeof(ht->t[0]) * ht->entries); + ht->used_entries = 0; +} + +/** + * Find what fraction of entries are being used. + * + * @param ht + * Hash table to find how many entries are being used in. + * @return + * Load factor of the hash table, or negative value on error. + */ +static inline double +rte_fbk_hash_get_load_factor(struct rte_fbk_hash_table *ht) +{ + return (double)ht->used_entries / (double)ht->entries; +} + +/** + * Performs a lookup for an existing hash table, and returns a pointer to + * the table if found. + * + * @param name + * Name of the hash table to find + * + * @return + * pointer to hash table structure or NULL on error with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_fbk_hash_table *rte_fbk_hash_find_existing(const char *name); + +/** + * Create a new hash table for use with four byte keys. + * + * @param params + * Parameters used in creation of hash table. + * + * @return + * Pointer to hash table structure that is used in future hash table + * operations, or NULL on error with rte_errno set appropriately. + * Possible rte_errno error values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - invalid parameter value passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_fbk_hash_table * \ +rte_fbk_hash_create(const struct rte_fbk_hash_params *params); + +/** + * Free all memory used by a hash table. + * Has no effect on hash tables allocated in memory zones + * + * @param ht + * Hash table to deallocate. + */ +void rte_fbk_hash_free(struct rte_fbk_hash_table *ht); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_FBK_HASH_H_ */ diff --git a/lib/librte_hash/rte_hash.h b/lib/librte_hash/rte_hash.h new file mode 100644 index 00000000..ae00b658 --- /dev/null +++ b/lib/librte_hash/rte_hash.h @@ -0,0 +1,436 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_HASH_H_ +#define _RTE_HASH_H_ + +/** + * @file + * + * RTE Hash Table + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** Maximum size of hash table that can be created. */ +#define RTE_HASH_ENTRIES_MAX (1 << 30) + +/** Maximum number of characters in hash name.*/ +#define RTE_HASH_NAMESIZE 32 + +/** Maximum number of keys that can be searched for using rte_hash_lookup_bulk. */ +#define RTE_HASH_LOOKUP_BULK_MAX 64 +#define RTE_HASH_LOOKUP_MULTI_MAX RTE_HASH_LOOKUP_BULK_MAX + +/** Enable Hardware transactional memory support. */ +#define RTE_HASH_EXTRA_FLAGS_TRANS_MEM_SUPPORT 0x01 + +/** Signature of key that is stored internally. */ +typedef uint32_t hash_sig_t; + +/** Type of function that can be used for calculating the hash value. */ +typedef uint32_t (*rte_hash_function)(const void *key, uint32_t key_len, + uint32_t init_val); + +/** Type of function used to compare the hash key. */ +typedef int (*rte_hash_cmp_eq_t)(const void *key1, const void *key2, size_t key_len); + +/** + * Parameters used when creating the hash table. + */ +struct rte_hash_parameters { + const char *name; /**< Name of the hash. */ + uint32_t entries; /**< Total hash table entries. */ + uint32_t reserved; /**< Unused field. Should be set to 0 */ + uint32_t key_len; /**< Length of hash key. */ + rte_hash_function hash_func; /**< Primary Hash function used to calculate hash. */ + uint32_t hash_func_init_val; /**< Init value used by hash_func. */ + int socket_id; /**< NUMA Socket ID for memory. */ + uint8_t extra_flag; /**< Indicate if additional parameters are present. */ +}; + +/** @internal A hash table structure. */ +struct rte_hash; + +/** + * Create a new hash table. + * + * @param params + * Parameters used to create and initialise the hash table. + * @return + * Pointer to hash table structure that is used in future hash table + * operations, or NULL on error, with error code set in rte_errno. + * Possible rte_errno errors include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOENT - missing entry + * - EINVAL - invalid parameter passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_hash * +rte_hash_create(const struct rte_hash_parameters *params); + +/** + * Set a new hash compare function other than the default one. + * + * @note Function pointer does not work with multi-process, so do not use it + * in multi-process mode. + * + * @param h + * Hash table for which the function is to be changed + * @param func + * New compare function + */ +void rte_hash_set_cmp_func(struct rte_hash *h, rte_hash_cmp_eq_t func); + +/** + * Find an existing hash table object and return a pointer to it. + * + * @param name + * Name of the hash table as passed to rte_hash_create() + * @return + * Pointer to hash table or NULL if object not found + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - value not available for return + */ +struct rte_hash * +rte_hash_find_existing(const char *name); + +/** + * De-allocate all memory used by hash table. + * @param h + * Hash table to free + */ +void +rte_hash_free(struct rte_hash *h); + +/** + * Reset all hash structure, by zeroing all entries + * @param h + * Hash table to reset + */ +void +rte_hash_reset(struct rte_hash *h); + +/** + * Add a key-value pair to an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param data + * Data to add to the hash table. + * @return + * - 0 if added successfully + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + */ +int +rte_hash_add_key_data(const struct rte_hash *h, const void *key, void *data); + +/** + * Add a key-value pair with a pre-computed hash value + * to an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param sig + * Precomputed hash value for 'key' + * @param data + * Data to add to the hash table. + * @return + * - 0 if added successfully + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + */ +int32_t +rte_hash_add_key_with_hash_data(const struct rte_hash *h, const void *key, + hash_sig_t sig, void *data); + +/** + * Add a key to an existing hash table. This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key. + */ +int32_t +rte_hash_add_key(const struct rte_hash *h, const void *key); + +/** + * Add a key to an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to add the key to. + * @param key + * Key to add to the hash table. + * @param sig + * Precomputed hash value for 'key'. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOSPC if there is no space in the hash for this key. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key. + */ +int32_t +rte_hash_add_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig); + +/** + * Remove a key from an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_del_key(const struct rte_hash *h, const void *key); + +/** + * Remove a key from an existing hash table. + * This operation is not multi-thread safe + * and should only be called from one thread. + * + * @param h + * Hash table to remove the key from. + * @param key + * Key to remove from the hash table. + * @param sig + * Precomputed hash value for 'key'. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_del_key_with_hash(const struct rte_hash *h, const void *key, hash_sig_t sig); + + +/** + * Find a key-value pair in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @param data + * Output with pointer to data returned from the hash table. + * @return + * 0 if successful lookup + * - EINVAL if the parameters are invalid. + * - ENOENT if the key is not found. + */ +int +rte_hash_lookup_data(const struct rte_hash *h, const void *key, void **data); + +/** + * Find a key-value pair with a pre-computed hash value + * to an existing hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @param sig + * Precomputed hash value for 'key' + * @param data + * Output with pointer to data returned from the hash table. + * @return + * 0 if successful lookup + * - EINVAL if the parameters are invalid. + * - ENOENT if the key is not found. + */ +int +rte_hash_lookup_with_hash_data(const struct rte_hash *h, const void *key, + hash_sig_t sig, void **data); + +/** + * Find a key in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_lookup(const struct rte_hash *h, const void *key); + +/** + * Find a key in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @param sig + * Hash value to remove from the hash table. + * @return + * - -EINVAL if the parameters are invalid. + * - -ENOENT if the key is not found. + * - A positive value that can be used by the caller as an offset into an + * array of user data. This value is unique for this key, and is the same + * value that was returned when the key was added. + */ +int32_t +rte_hash_lookup_with_hash(const struct rte_hash *h, + const void *key, hash_sig_t sig); + +/** + * Calc a hash value by key. + * This operation is not multi-thread safe. + * + * @param h + * Hash table to look in. + * @param key + * Key to find. + * @return + * - hash value + */ +hash_sig_t +rte_hash_hash(const struct rte_hash *h, const void *key); + +#define rte_hash_lookup_multi rte_hash_lookup_bulk +#define rte_hash_lookup_multi_data rte_hash_lookup_bulk_data +/** + * Find multiple keys in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param keys + * A pointer to a list of keys to look for. + * @param num_keys + * How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX). + * @param hit_mask + * Output containing a bitmask with all successful lookups. + * @param data + * Output containing array of data returned from all the successful lookups. + * @return + * -EINVAL if there's an error, otherwise number of successful lookups. + */ +int +rte_hash_lookup_bulk_data(const struct rte_hash *h, const void **keys, + uint32_t num_keys, uint64_t *hit_mask, void *data[]); + +/** + * Find multiple keys in the hash table. + * This operation is multi-thread safe. + * + * @param h + * Hash table to look in. + * @param keys + * A pointer to a list of keys to look for. + * @param num_keys + * How many keys are in the keys list (less than RTE_HASH_LOOKUP_BULK_MAX). + * @param positions + * Output containing a list of values, corresponding to the list of keys that + * can be used by the caller as an offset into an array of user data. These + * values are unique for each key, and are the same values that were returned + * when each key was added. If a key in the list was not found, then -ENOENT + * will be the value. + * @return + * -EINVAL if there's an error, otherwise 0. + */ +int +rte_hash_lookup_bulk(const struct rte_hash *h, const void **keys, + uint32_t num_keys, int32_t *positions); + +/** + * Iterate through the hash table, returning key-value pairs. + * + * @param h + * Hash table to iterate + * @param key + * Output containing the key where current iterator + * was pointing at + * @param data + * Output containing the data associated with key. + * Returns NULL if data was not stored. + * @param next + * Pointer to iterator. Should be 0 to start iterating the hash table. + * Iterator is incremented after each call of this function. + * @return + * Position where key was stored, if successful. + * - -EINVAL if the parameters are invalid. + * - -ENOENT if end of the hash table. + */ +int32_t +rte_hash_iterate(const struct rte_hash *h, const void **key, void **data, uint32_t *next); +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_HASH_H_ */ diff --git a/lib/librte_hash/rte_hash_crc.h b/lib/librte_hash/rte_hash_crc.h new file mode 100644 index 00000000..63e74aa4 --- /dev/null +++ b/lib/librte_hash/rte_hash_crc.h @@ -0,0 +1,639 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_HASH_CRC_H_ +#define _RTE_HASH_CRC_H_ + +/** + * @file + * + * RTE CRC Hash + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +/* Lookup tables for software implementation of CRC32C */ +static const uint32_t crc32c_tables[8][256] = {{ + 0x00000000, 0xF26B8303, 0xE13B70F7, 0x1350F3F4, 0xC79A971F, 0x35F1141C, 0x26A1E7E8, 0xD4CA64EB, + 0x8AD958CF, 0x78B2DBCC, 0x6BE22838, 0x9989AB3B, 0x4D43CFD0, 0xBF284CD3, 0xAC78BF27, 0x5E133C24, + 0x105EC76F, 0xE235446C, 0xF165B798, 0x030E349B, 0xD7C45070, 0x25AFD373, 0x36FF2087, 0xC494A384, + 0x9A879FA0, 0x68EC1CA3, 0x7BBCEF57, 0x89D76C54, 0x5D1D08BF, 0xAF768BBC, 0xBC267848, 0x4E4DFB4B, + 0x20BD8EDE, 0xD2D60DDD, 0xC186FE29, 0x33ED7D2A, 0xE72719C1, 0x154C9AC2, 0x061C6936, 0xF477EA35, + 0xAA64D611, 0x580F5512, 0x4B5FA6E6, 0xB93425E5, 0x6DFE410E, 0x9F95C20D, 0x8CC531F9, 0x7EAEB2FA, + 0x30E349B1, 0xC288CAB2, 0xD1D83946, 0x23B3BA45, 0xF779DEAE, 0x05125DAD, 0x1642AE59, 0xE4292D5A, + 0xBA3A117E, 0x4851927D, 0x5B016189, 0xA96AE28A, 0x7DA08661, 0x8FCB0562, 0x9C9BF696, 0x6EF07595, + 0x417B1DBC, 0xB3109EBF, 0xA0406D4B, 0x522BEE48, 0x86E18AA3, 0x748A09A0, 0x67DAFA54, 0x95B17957, + 0xCBA24573, 0x39C9C670, 0x2A993584, 0xD8F2B687, 0x0C38D26C, 0xFE53516F, 0xED03A29B, 0x1F682198, + 0x5125DAD3, 0xA34E59D0, 0xB01EAA24, 0x42752927, 0x96BF4DCC, 0x64D4CECF, 0x77843D3B, 0x85EFBE38, + 0xDBFC821C, 0x2997011F, 0x3AC7F2EB, 0xC8AC71E8, 0x1C661503, 0xEE0D9600, 0xFD5D65F4, 0x0F36E6F7, + 0x61C69362, 0x93AD1061, 0x80FDE395, 0x72966096, 0xA65C047D, 0x5437877E, 0x4767748A, 0xB50CF789, + 0xEB1FCBAD, 0x197448AE, 0x0A24BB5A, 0xF84F3859, 0x2C855CB2, 0xDEEEDFB1, 0xCDBE2C45, 0x3FD5AF46, + 0x7198540D, 0x83F3D70E, 0x90A324FA, 0x62C8A7F9, 0xB602C312, 0x44694011, 0x5739B3E5, 0xA55230E6, + 0xFB410CC2, 0x092A8FC1, 0x1A7A7C35, 0xE811FF36, 0x3CDB9BDD, 0xCEB018DE, 0xDDE0EB2A, 0x2F8B6829, + 0x82F63B78, 0x709DB87B, 0x63CD4B8F, 0x91A6C88C, 0x456CAC67, 0xB7072F64, 0xA457DC90, 0x563C5F93, + 0x082F63B7, 0xFA44E0B4, 0xE9141340, 0x1B7F9043, 0xCFB5F4A8, 0x3DDE77AB, 0x2E8E845F, 0xDCE5075C, + 0x92A8FC17, 0x60C37F14, 0x73938CE0, 0x81F80FE3, 0x55326B08, 0xA759E80B, 0xB4091BFF, 0x466298FC, + 0x1871A4D8, 0xEA1A27DB, 0xF94AD42F, 0x0B21572C, 0xDFEB33C7, 0x2D80B0C4, 0x3ED04330, 0xCCBBC033, + 0xA24BB5A6, 0x502036A5, 0x4370C551, 0xB11B4652, 0x65D122B9, 0x97BAA1BA, 0x84EA524E, 0x7681D14D, + 0x2892ED69, 0xDAF96E6A, 0xC9A99D9E, 0x3BC21E9D, 0xEF087A76, 0x1D63F975, 0x0E330A81, 0xFC588982, + 0xB21572C9, 0x407EF1CA, 0x532E023E, 0xA145813D, 0x758FE5D6, 0x87E466D5, 0x94B49521, 0x66DF1622, + 0x38CC2A06, 0xCAA7A905, 0xD9F75AF1, 0x2B9CD9F2, 0xFF56BD19, 0x0D3D3E1A, 0x1E6DCDEE, 0xEC064EED, + 0xC38D26C4, 0x31E6A5C7, 0x22B65633, 0xD0DDD530, 0x0417B1DB, 0xF67C32D8, 0xE52CC12C, 0x1747422F, + 0x49547E0B, 0xBB3FFD08, 0xA86F0EFC, 0x5A048DFF, 0x8ECEE914, 0x7CA56A17, 0x6FF599E3, 0x9D9E1AE0, + 0xD3D3E1AB, 0x21B862A8, 0x32E8915C, 0xC083125F, 0x144976B4, 0xE622F5B7, 0xF5720643, 0x07198540, + 0x590AB964, 0xAB613A67, 0xB831C993, 0x4A5A4A90, 0x9E902E7B, 0x6CFBAD78, 0x7FAB5E8C, 0x8DC0DD8F, + 0xE330A81A, 0x115B2B19, 0x020BD8ED, 0xF0605BEE, 0x24AA3F05, 0xD6C1BC06, 0xC5914FF2, 0x37FACCF1, + 0x69E9F0D5, 0x9B8273D6, 0x88D28022, 0x7AB90321, 0xAE7367CA, 0x5C18E4C9, 0x4F48173D, 0xBD23943E, + 0xF36E6F75, 0x0105EC76, 0x12551F82, 0xE03E9C81, 0x34F4F86A, 0xC69F7B69, 0xD5CF889D, 0x27A40B9E, + 0x79B737BA, 0x8BDCB4B9, 0x988C474D, 0x6AE7C44E, 0xBE2DA0A5, 0x4C4623A6, 0x5F16D052, 0xAD7D5351 +}, +{ + 0x00000000, 0x13A29877, 0x274530EE, 0x34E7A899, 0x4E8A61DC, 0x5D28F9AB, 0x69CF5132, 0x7A6DC945, + 0x9D14C3B8, 0x8EB65BCF, 0xBA51F356, 0xA9F36B21, 0xD39EA264, 0xC03C3A13, 0xF4DB928A, 0xE7790AFD, + 0x3FC5F181, 0x2C6769F6, 0x1880C16F, 0x0B225918, 0x714F905D, 0x62ED082A, 0x560AA0B3, 0x45A838C4, + 0xA2D13239, 0xB173AA4E, 0x859402D7, 0x96369AA0, 0xEC5B53E5, 0xFFF9CB92, 0xCB1E630B, 0xD8BCFB7C, + 0x7F8BE302, 0x6C297B75, 0x58CED3EC, 0x4B6C4B9B, 0x310182DE, 0x22A31AA9, 0x1644B230, 0x05E62A47, + 0xE29F20BA, 0xF13DB8CD, 0xC5DA1054, 0xD6788823, 0xAC154166, 0xBFB7D911, 0x8B507188, 0x98F2E9FF, + 0x404E1283, 0x53EC8AF4, 0x670B226D, 0x74A9BA1A, 0x0EC4735F, 0x1D66EB28, 0x298143B1, 0x3A23DBC6, + 0xDD5AD13B, 0xCEF8494C, 0xFA1FE1D5, 0xE9BD79A2, 0x93D0B0E7, 0x80722890, 0xB4958009, 0xA737187E, + 0xFF17C604, 0xECB55E73, 0xD852F6EA, 0xCBF06E9D, 0xB19DA7D8, 0xA23F3FAF, 0x96D89736, 0x857A0F41, + 0x620305BC, 0x71A19DCB, 0x45463552, 0x56E4AD25, 0x2C896460, 0x3F2BFC17, 0x0BCC548E, 0x186ECCF9, + 0xC0D23785, 0xD370AFF2, 0xE797076B, 0xF4359F1C, 0x8E585659, 0x9DFACE2E, 0xA91D66B7, 0xBABFFEC0, + 0x5DC6F43D, 0x4E646C4A, 0x7A83C4D3, 0x69215CA4, 0x134C95E1, 0x00EE0D96, 0x3409A50F, 0x27AB3D78, + 0x809C2506, 0x933EBD71, 0xA7D915E8, 0xB47B8D9F, 0xCE1644DA, 0xDDB4DCAD, 0xE9537434, 0xFAF1EC43, + 0x1D88E6BE, 0x0E2A7EC9, 0x3ACDD650, 0x296F4E27, 0x53028762, 0x40A01F15, 0x7447B78C, 0x67E52FFB, + 0xBF59D487, 0xACFB4CF0, 0x981CE469, 0x8BBE7C1E, 0xF1D3B55B, 0xE2712D2C, 0xD69685B5, 0xC5341DC2, + 0x224D173F, 0x31EF8F48, 0x050827D1, 0x16AABFA6, 0x6CC776E3, 0x7F65EE94, 0x4B82460D, 0x5820DE7A, + 0xFBC3FAF9, 0xE861628E, 0xDC86CA17, 0xCF245260, 0xB5499B25, 0xA6EB0352, 0x920CABCB, 0x81AE33BC, + 0x66D73941, 0x7575A136, 0x419209AF, 0x523091D8, 0x285D589D, 0x3BFFC0EA, 0x0F186873, 0x1CBAF004, + 0xC4060B78, 0xD7A4930F, 0xE3433B96, 0xF0E1A3E1, 0x8A8C6AA4, 0x992EF2D3, 0xADC95A4A, 0xBE6BC23D, + 0x5912C8C0, 0x4AB050B7, 0x7E57F82E, 0x6DF56059, 0x1798A91C, 0x043A316B, 0x30DD99F2, 0x237F0185, + 0x844819FB, 0x97EA818C, 0xA30D2915, 0xB0AFB162, 0xCAC27827, 0xD960E050, 0xED8748C9, 0xFE25D0BE, + 0x195CDA43, 0x0AFE4234, 0x3E19EAAD, 0x2DBB72DA, 0x57D6BB9F, 0x447423E8, 0x70938B71, 0x63311306, + 0xBB8DE87A, 0xA82F700D, 0x9CC8D894, 0x8F6A40E3, 0xF50789A6, 0xE6A511D1, 0xD242B948, 0xC1E0213F, + 0x26992BC2, 0x353BB3B5, 0x01DC1B2C, 0x127E835B, 0x68134A1E, 0x7BB1D269, 0x4F567AF0, 0x5CF4E287, + 0x04D43CFD, 0x1776A48A, 0x23910C13, 0x30339464, 0x4A5E5D21, 0x59FCC556, 0x6D1B6DCF, 0x7EB9F5B8, + 0x99C0FF45, 0x8A626732, 0xBE85CFAB, 0xAD2757DC, 0xD74A9E99, 0xC4E806EE, 0xF00FAE77, 0xE3AD3600, + 0x3B11CD7C, 0x28B3550B, 0x1C54FD92, 0x0FF665E5, 0x759BACA0, 0x663934D7, 0x52DE9C4E, 0x417C0439, + 0xA6050EC4, 0xB5A796B3, 0x81403E2A, 0x92E2A65D, 0xE88F6F18, 0xFB2DF76F, 0xCFCA5FF6, 0xDC68C781, + 0x7B5FDFFF, 0x68FD4788, 0x5C1AEF11, 0x4FB87766, 0x35D5BE23, 0x26772654, 0x12908ECD, 0x013216BA, + 0xE64B1C47, 0xF5E98430, 0xC10E2CA9, 0xD2ACB4DE, 0xA8C17D9B, 0xBB63E5EC, 0x8F844D75, 0x9C26D502, + 0x449A2E7E, 0x5738B609, 0x63DF1E90, 0x707D86E7, 0x0A104FA2, 0x19B2D7D5, 0x2D557F4C, 0x3EF7E73B, + 0xD98EEDC6, 0xCA2C75B1, 0xFECBDD28, 0xED69455F, 0x97048C1A, 0x84A6146D, 0xB041BCF4, 0xA3E32483 +}, +{ + 0x00000000, 0xA541927E, 0x4F6F520D, 0xEA2EC073, 0x9EDEA41A, 0x3B9F3664, 0xD1B1F617, 0x74F06469, + 0x38513EC5, 0x9D10ACBB, 0x773E6CC8, 0xD27FFEB6, 0xA68F9ADF, 0x03CE08A1, 0xE9E0C8D2, 0x4CA15AAC, + 0x70A27D8A, 0xD5E3EFF4, 0x3FCD2F87, 0x9A8CBDF9, 0xEE7CD990, 0x4B3D4BEE, 0xA1138B9D, 0x045219E3, + 0x48F3434F, 0xEDB2D131, 0x079C1142, 0xA2DD833C, 0xD62DE755, 0x736C752B, 0x9942B558, 0x3C032726, + 0xE144FB14, 0x4405696A, 0xAE2BA919, 0x0B6A3B67, 0x7F9A5F0E, 0xDADBCD70, 0x30F50D03, 0x95B49F7D, + 0xD915C5D1, 0x7C5457AF, 0x967A97DC, 0x333B05A2, 0x47CB61CB, 0xE28AF3B5, 0x08A433C6, 0xADE5A1B8, + 0x91E6869E, 0x34A714E0, 0xDE89D493, 0x7BC846ED, 0x0F382284, 0xAA79B0FA, 0x40577089, 0xE516E2F7, + 0xA9B7B85B, 0x0CF62A25, 0xE6D8EA56, 0x43997828, 0x37691C41, 0x92288E3F, 0x78064E4C, 0xDD47DC32, + 0xC76580D9, 0x622412A7, 0x880AD2D4, 0x2D4B40AA, 0x59BB24C3, 0xFCFAB6BD, 0x16D476CE, 0xB395E4B0, + 0xFF34BE1C, 0x5A752C62, 0xB05BEC11, 0x151A7E6F, 0x61EA1A06, 0xC4AB8878, 0x2E85480B, 0x8BC4DA75, + 0xB7C7FD53, 0x12866F2D, 0xF8A8AF5E, 0x5DE93D20, 0x29195949, 0x8C58CB37, 0x66760B44, 0xC337993A, + 0x8F96C396, 0x2AD751E8, 0xC0F9919B, 0x65B803E5, 0x1148678C, 0xB409F5F2, 0x5E273581, 0xFB66A7FF, + 0x26217BCD, 0x8360E9B3, 0x694E29C0, 0xCC0FBBBE, 0xB8FFDFD7, 0x1DBE4DA9, 0xF7908DDA, 0x52D11FA4, + 0x1E704508, 0xBB31D776, 0x511F1705, 0xF45E857B, 0x80AEE112, 0x25EF736C, 0xCFC1B31F, 0x6A802161, + 0x56830647, 0xF3C29439, 0x19EC544A, 0xBCADC634, 0xC85DA25D, 0x6D1C3023, 0x8732F050, 0x2273622E, + 0x6ED23882, 0xCB93AAFC, 0x21BD6A8F, 0x84FCF8F1, 0xF00C9C98, 0x554D0EE6, 0xBF63CE95, 0x1A225CEB, + 0x8B277743, 0x2E66E53D, 0xC448254E, 0x6109B730, 0x15F9D359, 0xB0B84127, 0x5A968154, 0xFFD7132A, + 0xB3764986, 0x1637DBF8, 0xFC191B8B, 0x595889F5, 0x2DA8ED9C, 0x88E97FE2, 0x62C7BF91, 0xC7862DEF, + 0xFB850AC9, 0x5EC498B7, 0xB4EA58C4, 0x11ABCABA, 0x655BAED3, 0xC01A3CAD, 0x2A34FCDE, 0x8F756EA0, + 0xC3D4340C, 0x6695A672, 0x8CBB6601, 0x29FAF47F, 0x5D0A9016, 0xF84B0268, 0x1265C21B, 0xB7245065, + 0x6A638C57, 0xCF221E29, 0x250CDE5A, 0x804D4C24, 0xF4BD284D, 0x51FCBA33, 0xBBD27A40, 0x1E93E83E, + 0x5232B292, 0xF77320EC, 0x1D5DE09F, 0xB81C72E1, 0xCCEC1688, 0x69AD84F6, 0x83834485, 0x26C2D6FB, + 0x1AC1F1DD, 0xBF8063A3, 0x55AEA3D0, 0xF0EF31AE, 0x841F55C7, 0x215EC7B9, 0xCB7007CA, 0x6E3195B4, + 0x2290CF18, 0x87D15D66, 0x6DFF9D15, 0xC8BE0F6B, 0xBC4E6B02, 0x190FF97C, 0xF321390F, 0x5660AB71, + 0x4C42F79A, 0xE90365E4, 0x032DA597, 0xA66C37E9, 0xD29C5380, 0x77DDC1FE, 0x9DF3018D, 0x38B293F3, + 0x7413C95F, 0xD1525B21, 0x3B7C9B52, 0x9E3D092C, 0xEACD6D45, 0x4F8CFF3B, 0xA5A23F48, 0x00E3AD36, + 0x3CE08A10, 0x99A1186E, 0x738FD81D, 0xD6CE4A63, 0xA23E2E0A, 0x077FBC74, 0xED517C07, 0x4810EE79, + 0x04B1B4D5, 0xA1F026AB, 0x4BDEE6D8, 0xEE9F74A6, 0x9A6F10CF, 0x3F2E82B1, 0xD50042C2, 0x7041D0BC, + 0xAD060C8E, 0x08479EF0, 0xE2695E83, 0x4728CCFD, 0x33D8A894, 0x96993AEA, 0x7CB7FA99, 0xD9F668E7, + 0x9557324B, 0x3016A035, 0xDA386046, 0x7F79F238, 0x0B899651, 0xAEC8042F, 0x44E6C45C, 0xE1A75622, + 0xDDA47104, 0x78E5E37A, 0x92CB2309, 0x378AB177, 0x437AD51E, 0xE63B4760, 0x0C158713, 0xA954156D, + 0xE5F54FC1, 0x40B4DDBF, 0xAA9A1DCC, 0x0FDB8FB2, 0x7B2BEBDB, 0xDE6A79A5, 0x3444B9D6, 0x91052BA8 +}, +{ + 0x00000000, 0xDD45AAB8, 0xBF672381, 0x62228939, 0x7B2231F3, 0xA6679B4B, 0xC4451272, 0x1900B8CA, + 0xF64463E6, 0x2B01C95E, 0x49234067, 0x9466EADF, 0x8D665215, 0x5023F8AD, 0x32017194, 0xEF44DB2C, + 0xE964B13D, 0x34211B85, 0x560392BC, 0x8B463804, 0x924680CE, 0x4F032A76, 0x2D21A34F, 0xF06409F7, + 0x1F20D2DB, 0xC2657863, 0xA047F15A, 0x7D025BE2, 0x6402E328, 0xB9474990, 0xDB65C0A9, 0x06206A11, + 0xD725148B, 0x0A60BE33, 0x6842370A, 0xB5079DB2, 0xAC072578, 0x71428FC0, 0x136006F9, 0xCE25AC41, + 0x2161776D, 0xFC24DDD5, 0x9E0654EC, 0x4343FE54, 0x5A43469E, 0x8706EC26, 0xE524651F, 0x3861CFA7, + 0x3E41A5B6, 0xE3040F0E, 0x81268637, 0x5C632C8F, 0x45639445, 0x98263EFD, 0xFA04B7C4, 0x27411D7C, + 0xC805C650, 0x15406CE8, 0x7762E5D1, 0xAA274F69, 0xB327F7A3, 0x6E625D1B, 0x0C40D422, 0xD1057E9A, + 0xABA65FE7, 0x76E3F55F, 0x14C17C66, 0xC984D6DE, 0xD0846E14, 0x0DC1C4AC, 0x6FE34D95, 0xB2A6E72D, + 0x5DE23C01, 0x80A796B9, 0xE2851F80, 0x3FC0B538, 0x26C00DF2, 0xFB85A74A, 0x99A72E73, 0x44E284CB, + 0x42C2EEDA, 0x9F874462, 0xFDA5CD5B, 0x20E067E3, 0x39E0DF29, 0xE4A57591, 0x8687FCA8, 0x5BC25610, + 0xB4868D3C, 0x69C32784, 0x0BE1AEBD, 0xD6A40405, 0xCFA4BCCF, 0x12E11677, 0x70C39F4E, 0xAD8635F6, + 0x7C834B6C, 0xA1C6E1D4, 0xC3E468ED, 0x1EA1C255, 0x07A17A9F, 0xDAE4D027, 0xB8C6591E, 0x6583F3A6, + 0x8AC7288A, 0x57828232, 0x35A00B0B, 0xE8E5A1B3, 0xF1E51979, 0x2CA0B3C1, 0x4E823AF8, 0x93C79040, + 0x95E7FA51, 0x48A250E9, 0x2A80D9D0, 0xF7C57368, 0xEEC5CBA2, 0x3380611A, 0x51A2E823, 0x8CE7429B, + 0x63A399B7, 0xBEE6330F, 0xDCC4BA36, 0x0181108E, 0x1881A844, 0xC5C402FC, 0xA7E68BC5, 0x7AA3217D, + 0x52A0C93F, 0x8FE56387, 0xEDC7EABE, 0x30824006, 0x2982F8CC, 0xF4C75274, 0x96E5DB4D, 0x4BA071F5, + 0xA4E4AAD9, 0x79A10061, 0x1B838958, 0xC6C623E0, 0xDFC69B2A, 0x02833192, 0x60A1B8AB, 0xBDE41213, + 0xBBC47802, 0x6681D2BA, 0x04A35B83, 0xD9E6F13B, 0xC0E649F1, 0x1DA3E349, 0x7F816A70, 0xA2C4C0C8, + 0x4D801BE4, 0x90C5B15C, 0xF2E73865, 0x2FA292DD, 0x36A22A17, 0xEBE780AF, 0x89C50996, 0x5480A32E, + 0x8585DDB4, 0x58C0770C, 0x3AE2FE35, 0xE7A7548D, 0xFEA7EC47, 0x23E246FF, 0x41C0CFC6, 0x9C85657E, + 0x73C1BE52, 0xAE8414EA, 0xCCA69DD3, 0x11E3376B, 0x08E38FA1, 0xD5A62519, 0xB784AC20, 0x6AC10698, + 0x6CE16C89, 0xB1A4C631, 0xD3864F08, 0x0EC3E5B0, 0x17C35D7A, 0xCA86F7C2, 0xA8A47EFB, 0x75E1D443, + 0x9AA50F6F, 0x47E0A5D7, 0x25C22CEE, 0xF8878656, 0xE1873E9C, 0x3CC29424, 0x5EE01D1D, 0x83A5B7A5, + 0xF90696D8, 0x24433C60, 0x4661B559, 0x9B241FE1, 0x8224A72B, 0x5F610D93, 0x3D4384AA, 0xE0062E12, + 0x0F42F53E, 0xD2075F86, 0xB025D6BF, 0x6D607C07, 0x7460C4CD, 0xA9256E75, 0xCB07E74C, 0x16424DF4, + 0x106227E5, 0xCD278D5D, 0xAF050464, 0x7240AEDC, 0x6B401616, 0xB605BCAE, 0xD4273597, 0x09629F2F, + 0xE6264403, 0x3B63EEBB, 0x59416782, 0x8404CD3A, 0x9D0475F0, 0x4041DF48, 0x22635671, 0xFF26FCC9, + 0x2E238253, 0xF36628EB, 0x9144A1D2, 0x4C010B6A, 0x5501B3A0, 0x88441918, 0xEA669021, 0x37233A99, + 0xD867E1B5, 0x05224B0D, 0x6700C234, 0xBA45688C, 0xA345D046, 0x7E007AFE, 0x1C22F3C7, 0xC167597F, + 0xC747336E, 0x1A0299D6, 0x782010EF, 0xA565BA57, 0xBC65029D, 0x6120A825, 0x0302211C, 0xDE478BA4, + 0x31035088, 0xEC46FA30, 0x8E647309, 0x5321D9B1, 0x4A21617B, 0x9764CBC3, 0xF54642FA, 0x2803E842 +}, +{ + 0x00000000, 0x38116FAC, 0x7022DF58, 0x4833B0F4, 0xE045BEB0, 0xD854D11C, 0x906761E8, 0xA8760E44, + 0xC5670B91, 0xFD76643D, 0xB545D4C9, 0x8D54BB65, 0x2522B521, 0x1D33DA8D, 0x55006A79, 0x6D1105D5, + 0x8F2261D3, 0xB7330E7F, 0xFF00BE8B, 0xC711D127, 0x6F67DF63, 0x5776B0CF, 0x1F45003B, 0x27546F97, + 0x4A456A42, 0x725405EE, 0x3A67B51A, 0x0276DAB6, 0xAA00D4F2, 0x9211BB5E, 0xDA220BAA, 0xE2336406, + 0x1BA8B557, 0x23B9DAFB, 0x6B8A6A0F, 0x539B05A3, 0xFBED0BE7, 0xC3FC644B, 0x8BCFD4BF, 0xB3DEBB13, + 0xDECFBEC6, 0xE6DED16A, 0xAEED619E, 0x96FC0E32, 0x3E8A0076, 0x069B6FDA, 0x4EA8DF2E, 0x76B9B082, + 0x948AD484, 0xAC9BBB28, 0xE4A80BDC, 0xDCB96470, 0x74CF6A34, 0x4CDE0598, 0x04EDB56C, 0x3CFCDAC0, + 0x51EDDF15, 0x69FCB0B9, 0x21CF004D, 0x19DE6FE1, 0xB1A861A5, 0x89B90E09, 0xC18ABEFD, 0xF99BD151, + 0x37516AAE, 0x0F400502, 0x4773B5F6, 0x7F62DA5A, 0xD714D41E, 0xEF05BBB2, 0xA7360B46, 0x9F2764EA, + 0xF236613F, 0xCA270E93, 0x8214BE67, 0xBA05D1CB, 0x1273DF8F, 0x2A62B023, 0x625100D7, 0x5A406F7B, + 0xB8730B7D, 0x806264D1, 0xC851D425, 0xF040BB89, 0x5836B5CD, 0x6027DA61, 0x28146A95, 0x10050539, + 0x7D1400EC, 0x45056F40, 0x0D36DFB4, 0x3527B018, 0x9D51BE5C, 0xA540D1F0, 0xED736104, 0xD5620EA8, + 0x2CF9DFF9, 0x14E8B055, 0x5CDB00A1, 0x64CA6F0D, 0xCCBC6149, 0xF4AD0EE5, 0xBC9EBE11, 0x848FD1BD, + 0xE99ED468, 0xD18FBBC4, 0x99BC0B30, 0xA1AD649C, 0x09DB6AD8, 0x31CA0574, 0x79F9B580, 0x41E8DA2C, + 0xA3DBBE2A, 0x9BCAD186, 0xD3F96172, 0xEBE80EDE, 0x439E009A, 0x7B8F6F36, 0x33BCDFC2, 0x0BADB06E, + 0x66BCB5BB, 0x5EADDA17, 0x169E6AE3, 0x2E8F054F, 0x86F90B0B, 0xBEE864A7, 0xF6DBD453, 0xCECABBFF, + 0x6EA2D55C, 0x56B3BAF0, 0x1E800A04, 0x269165A8, 0x8EE76BEC, 0xB6F60440, 0xFEC5B4B4, 0xC6D4DB18, + 0xABC5DECD, 0x93D4B161, 0xDBE70195, 0xE3F66E39, 0x4B80607D, 0x73910FD1, 0x3BA2BF25, 0x03B3D089, + 0xE180B48F, 0xD991DB23, 0x91A26BD7, 0xA9B3047B, 0x01C50A3F, 0x39D46593, 0x71E7D567, 0x49F6BACB, + 0x24E7BF1E, 0x1CF6D0B2, 0x54C56046, 0x6CD40FEA, 0xC4A201AE, 0xFCB36E02, 0xB480DEF6, 0x8C91B15A, + 0x750A600B, 0x4D1B0FA7, 0x0528BF53, 0x3D39D0FF, 0x954FDEBB, 0xAD5EB117, 0xE56D01E3, 0xDD7C6E4F, + 0xB06D6B9A, 0x887C0436, 0xC04FB4C2, 0xF85EDB6E, 0x5028D52A, 0x6839BA86, 0x200A0A72, 0x181B65DE, + 0xFA2801D8, 0xC2396E74, 0x8A0ADE80, 0xB21BB12C, 0x1A6DBF68, 0x227CD0C4, 0x6A4F6030, 0x525E0F9C, + 0x3F4F0A49, 0x075E65E5, 0x4F6DD511, 0x777CBABD, 0xDF0AB4F9, 0xE71BDB55, 0xAF286BA1, 0x9739040D, + 0x59F3BFF2, 0x61E2D05E, 0x29D160AA, 0x11C00F06, 0xB9B60142, 0x81A76EEE, 0xC994DE1A, 0xF185B1B6, + 0x9C94B463, 0xA485DBCF, 0xECB66B3B, 0xD4A70497, 0x7CD10AD3, 0x44C0657F, 0x0CF3D58B, 0x34E2BA27, + 0xD6D1DE21, 0xEEC0B18D, 0xA6F30179, 0x9EE26ED5, 0x36946091, 0x0E850F3D, 0x46B6BFC9, 0x7EA7D065, + 0x13B6D5B0, 0x2BA7BA1C, 0x63940AE8, 0x5B856544, 0xF3F36B00, 0xCBE204AC, 0x83D1B458, 0xBBC0DBF4, + 0x425B0AA5, 0x7A4A6509, 0x3279D5FD, 0x0A68BA51, 0xA21EB415, 0x9A0FDBB9, 0xD23C6B4D, 0xEA2D04E1, + 0x873C0134, 0xBF2D6E98, 0xF71EDE6C, 0xCF0FB1C0, 0x6779BF84, 0x5F68D028, 0x175B60DC, 0x2F4A0F70, + 0xCD796B76, 0xF56804DA, 0xBD5BB42E, 0x854ADB82, 0x2D3CD5C6, 0x152DBA6A, 0x5D1E0A9E, 0x650F6532, + 0x081E60E7, 0x300F0F4B, 0x783CBFBF, 0x402DD013, 0xE85BDE57, 0xD04AB1FB, 0x9879010F, 0xA0686EA3 +}, +{ + 0x00000000, 0xEF306B19, 0xDB8CA0C3, 0x34BCCBDA, 0xB2F53777, 0x5DC55C6E, 0x697997B4, 0x8649FCAD, + 0x6006181F, 0x8F367306, 0xBB8AB8DC, 0x54BAD3C5, 0xD2F32F68, 0x3DC34471, 0x097F8FAB, 0xE64FE4B2, + 0xC00C303E, 0x2F3C5B27, 0x1B8090FD, 0xF4B0FBE4, 0x72F90749, 0x9DC96C50, 0xA975A78A, 0x4645CC93, + 0xA00A2821, 0x4F3A4338, 0x7B8688E2, 0x94B6E3FB, 0x12FF1F56, 0xFDCF744F, 0xC973BF95, 0x2643D48C, + 0x85F4168D, 0x6AC47D94, 0x5E78B64E, 0xB148DD57, 0x370121FA, 0xD8314AE3, 0xEC8D8139, 0x03BDEA20, + 0xE5F20E92, 0x0AC2658B, 0x3E7EAE51, 0xD14EC548, 0x570739E5, 0xB83752FC, 0x8C8B9926, 0x63BBF23F, + 0x45F826B3, 0xAAC84DAA, 0x9E748670, 0x7144ED69, 0xF70D11C4, 0x183D7ADD, 0x2C81B107, 0xC3B1DA1E, + 0x25FE3EAC, 0xCACE55B5, 0xFE729E6F, 0x1142F576, 0x970B09DB, 0x783B62C2, 0x4C87A918, 0xA3B7C201, + 0x0E045BEB, 0xE13430F2, 0xD588FB28, 0x3AB89031, 0xBCF16C9C, 0x53C10785, 0x677DCC5F, 0x884DA746, + 0x6E0243F4, 0x813228ED, 0xB58EE337, 0x5ABE882E, 0xDCF77483, 0x33C71F9A, 0x077BD440, 0xE84BBF59, + 0xCE086BD5, 0x213800CC, 0x1584CB16, 0xFAB4A00F, 0x7CFD5CA2, 0x93CD37BB, 0xA771FC61, 0x48419778, + 0xAE0E73CA, 0x413E18D3, 0x7582D309, 0x9AB2B810, 0x1CFB44BD, 0xF3CB2FA4, 0xC777E47E, 0x28478F67, + 0x8BF04D66, 0x64C0267F, 0x507CEDA5, 0xBF4C86BC, 0x39057A11, 0xD6351108, 0xE289DAD2, 0x0DB9B1CB, + 0xEBF65579, 0x04C63E60, 0x307AF5BA, 0xDF4A9EA3, 0x5903620E, 0xB6330917, 0x828FC2CD, 0x6DBFA9D4, + 0x4BFC7D58, 0xA4CC1641, 0x9070DD9B, 0x7F40B682, 0xF9094A2F, 0x16392136, 0x2285EAEC, 0xCDB581F5, + 0x2BFA6547, 0xC4CA0E5E, 0xF076C584, 0x1F46AE9D, 0x990F5230, 0x763F3929, 0x4283F2F3, 0xADB399EA, + 0x1C08B7D6, 0xF338DCCF, 0xC7841715, 0x28B47C0C, 0xAEFD80A1, 0x41CDEBB8, 0x75712062, 0x9A414B7B, + 0x7C0EAFC9, 0x933EC4D0, 0xA7820F0A, 0x48B26413, 0xCEFB98BE, 0x21CBF3A7, 0x1577387D, 0xFA475364, + 0xDC0487E8, 0x3334ECF1, 0x0788272B, 0xE8B84C32, 0x6EF1B09F, 0x81C1DB86, 0xB57D105C, 0x5A4D7B45, + 0xBC029FF7, 0x5332F4EE, 0x678E3F34, 0x88BE542D, 0x0EF7A880, 0xE1C7C399, 0xD57B0843, 0x3A4B635A, + 0x99FCA15B, 0x76CCCA42, 0x42700198, 0xAD406A81, 0x2B09962C, 0xC439FD35, 0xF08536EF, 0x1FB55DF6, + 0xF9FAB944, 0x16CAD25D, 0x22761987, 0xCD46729E, 0x4B0F8E33, 0xA43FE52A, 0x90832EF0, 0x7FB345E9, + 0x59F09165, 0xB6C0FA7C, 0x827C31A6, 0x6D4C5ABF, 0xEB05A612, 0x0435CD0B, 0x308906D1, 0xDFB96DC8, + 0x39F6897A, 0xD6C6E263, 0xE27A29B9, 0x0D4A42A0, 0x8B03BE0D, 0x6433D514, 0x508F1ECE, 0xBFBF75D7, + 0x120CEC3D, 0xFD3C8724, 0xC9804CFE, 0x26B027E7, 0xA0F9DB4A, 0x4FC9B053, 0x7B757B89, 0x94451090, + 0x720AF422, 0x9D3A9F3B, 0xA98654E1, 0x46B63FF8, 0xC0FFC355, 0x2FCFA84C, 0x1B736396, 0xF443088F, + 0xD200DC03, 0x3D30B71A, 0x098C7CC0, 0xE6BC17D9, 0x60F5EB74, 0x8FC5806D, 0xBB794BB7, 0x544920AE, + 0xB206C41C, 0x5D36AF05, 0x698A64DF, 0x86BA0FC6, 0x00F3F36B, 0xEFC39872, 0xDB7F53A8, 0x344F38B1, + 0x97F8FAB0, 0x78C891A9, 0x4C745A73, 0xA344316A, 0x250DCDC7, 0xCA3DA6DE, 0xFE816D04, 0x11B1061D, + 0xF7FEE2AF, 0x18CE89B6, 0x2C72426C, 0xC3422975, 0x450BD5D8, 0xAA3BBEC1, 0x9E87751B, 0x71B71E02, + 0x57F4CA8E, 0xB8C4A197, 0x8C786A4D, 0x63480154, 0xE501FDF9, 0x0A3196E0, 0x3E8D5D3A, 0xD1BD3623, + 0x37F2D291, 0xD8C2B988, 0xEC7E7252, 0x034E194B, 0x8507E5E6, 0x6A378EFF, 0x5E8B4525, 0xB1BB2E3C +}, +{ + 0x00000000, 0x68032CC8, 0xD0065990, 0xB8057558, 0xA5E0C5D1, 0xCDE3E919, 0x75E69C41, 0x1DE5B089, + 0x4E2DFD53, 0x262ED19B, 0x9E2BA4C3, 0xF628880B, 0xEBCD3882, 0x83CE144A, 0x3BCB6112, 0x53C84DDA, + 0x9C5BFAA6, 0xF458D66E, 0x4C5DA336, 0x245E8FFE, 0x39BB3F77, 0x51B813BF, 0xE9BD66E7, 0x81BE4A2F, + 0xD27607F5, 0xBA752B3D, 0x02705E65, 0x6A7372AD, 0x7796C224, 0x1F95EEEC, 0xA7909BB4, 0xCF93B77C, + 0x3D5B83BD, 0x5558AF75, 0xED5DDA2D, 0x855EF6E5, 0x98BB466C, 0xF0B86AA4, 0x48BD1FFC, 0x20BE3334, + 0x73767EEE, 0x1B755226, 0xA370277E, 0xCB730BB6, 0xD696BB3F, 0xBE9597F7, 0x0690E2AF, 0x6E93CE67, + 0xA100791B, 0xC90355D3, 0x7106208B, 0x19050C43, 0x04E0BCCA, 0x6CE39002, 0xD4E6E55A, 0xBCE5C992, + 0xEF2D8448, 0x872EA880, 0x3F2BDDD8, 0x5728F110, 0x4ACD4199, 0x22CE6D51, 0x9ACB1809, 0xF2C834C1, + 0x7AB7077A, 0x12B42BB2, 0xAAB15EEA, 0xC2B27222, 0xDF57C2AB, 0xB754EE63, 0x0F519B3B, 0x6752B7F3, + 0x349AFA29, 0x5C99D6E1, 0xE49CA3B9, 0x8C9F8F71, 0x917A3FF8, 0xF9791330, 0x417C6668, 0x297F4AA0, + 0xE6ECFDDC, 0x8EEFD114, 0x36EAA44C, 0x5EE98884, 0x430C380D, 0x2B0F14C5, 0x930A619D, 0xFB094D55, + 0xA8C1008F, 0xC0C22C47, 0x78C7591F, 0x10C475D7, 0x0D21C55E, 0x6522E996, 0xDD279CCE, 0xB524B006, + 0x47EC84C7, 0x2FEFA80F, 0x97EADD57, 0xFFE9F19F, 0xE20C4116, 0x8A0F6DDE, 0x320A1886, 0x5A09344E, + 0x09C17994, 0x61C2555C, 0xD9C72004, 0xB1C40CCC, 0xAC21BC45, 0xC422908D, 0x7C27E5D5, 0x1424C91D, + 0xDBB77E61, 0xB3B452A9, 0x0BB127F1, 0x63B20B39, 0x7E57BBB0, 0x16549778, 0xAE51E220, 0xC652CEE8, + 0x959A8332, 0xFD99AFFA, 0x459CDAA2, 0x2D9FF66A, 0x307A46E3, 0x58796A2B, 0xE07C1F73, 0x887F33BB, + 0xF56E0EF4, 0x9D6D223C, 0x25685764, 0x4D6B7BAC, 0x508ECB25, 0x388DE7ED, 0x808892B5, 0xE88BBE7D, + 0xBB43F3A7, 0xD340DF6F, 0x6B45AA37, 0x034686FF, 0x1EA33676, 0x76A01ABE, 0xCEA56FE6, 0xA6A6432E, + 0x6935F452, 0x0136D89A, 0xB933ADC2, 0xD130810A, 0xCCD53183, 0xA4D61D4B, 0x1CD36813, 0x74D044DB, + 0x27180901, 0x4F1B25C9, 0xF71E5091, 0x9F1D7C59, 0x82F8CCD0, 0xEAFBE018, 0x52FE9540, 0x3AFDB988, + 0xC8358D49, 0xA036A181, 0x1833D4D9, 0x7030F811, 0x6DD54898, 0x05D66450, 0xBDD31108, 0xD5D03DC0, + 0x8618701A, 0xEE1B5CD2, 0x561E298A, 0x3E1D0542, 0x23F8B5CB, 0x4BFB9903, 0xF3FEEC5B, 0x9BFDC093, + 0x546E77EF, 0x3C6D5B27, 0x84682E7F, 0xEC6B02B7, 0xF18EB23E, 0x998D9EF6, 0x2188EBAE, 0x498BC766, + 0x1A438ABC, 0x7240A674, 0xCA45D32C, 0xA246FFE4, 0xBFA34F6D, 0xD7A063A5, 0x6FA516FD, 0x07A63A35, + 0x8FD9098E, 0xE7DA2546, 0x5FDF501E, 0x37DC7CD6, 0x2A39CC5F, 0x423AE097, 0xFA3F95CF, 0x923CB907, + 0xC1F4F4DD, 0xA9F7D815, 0x11F2AD4D, 0x79F18185, 0x6414310C, 0x0C171DC4, 0xB412689C, 0xDC114454, + 0x1382F328, 0x7B81DFE0, 0xC384AAB8, 0xAB878670, 0xB66236F9, 0xDE611A31, 0x66646F69, 0x0E6743A1, + 0x5DAF0E7B, 0x35AC22B3, 0x8DA957EB, 0xE5AA7B23, 0xF84FCBAA, 0x904CE762, 0x2849923A, 0x404ABEF2, + 0xB2828A33, 0xDA81A6FB, 0x6284D3A3, 0x0A87FF6B, 0x17624FE2, 0x7F61632A, 0xC7641672, 0xAF673ABA, + 0xFCAF7760, 0x94AC5BA8, 0x2CA92EF0, 0x44AA0238, 0x594FB2B1, 0x314C9E79, 0x8949EB21, 0xE14AC7E9, + 0x2ED97095, 0x46DA5C5D, 0xFEDF2905, 0x96DC05CD, 0x8B39B544, 0xE33A998C, 0x5B3FECD4, 0x333CC01C, + 0x60F48DC6, 0x08F7A10E, 0xB0F2D456, 0xD8F1F89E, 0xC5144817, 0xAD1764DF, 0x15121187, 0x7D113D4F +}, +{ + 0x00000000, 0x493C7D27, 0x9278FA4E, 0xDB448769, 0x211D826D, 0x6821FF4A, 0xB3657823, 0xFA590504, + 0x423B04DA, 0x0B0779FD, 0xD043FE94, 0x997F83B3, 0x632686B7, 0x2A1AFB90, 0xF15E7CF9, 0xB86201DE, + 0x847609B4, 0xCD4A7493, 0x160EF3FA, 0x5F328EDD, 0xA56B8BD9, 0xEC57F6FE, 0x37137197, 0x7E2F0CB0, + 0xC64D0D6E, 0x8F717049, 0x5435F720, 0x1D098A07, 0xE7508F03, 0xAE6CF224, 0x7528754D, 0x3C14086A, + 0x0D006599, 0x443C18BE, 0x9F789FD7, 0xD644E2F0, 0x2C1DE7F4, 0x65219AD3, 0xBE651DBA, 0xF759609D, + 0x4F3B6143, 0x06071C64, 0xDD439B0D, 0x947FE62A, 0x6E26E32E, 0x271A9E09, 0xFC5E1960, 0xB5626447, + 0x89766C2D, 0xC04A110A, 0x1B0E9663, 0x5232EB44, 0xA86BEE40, 0xE1579367, 0x3A13140E, 0x732F6929, + 0xCB4D68F7, 0x827115D0, 0x593592B9, 0x1009EF9E, 0xEA50EA9A, 0xA36C97BD, 0x782810D4, 0x31146DF3, + 0x1A00CB32, 0x533CB615, 0x8878317C, 0xC1444C5B, 0x3B1D495F, 0x72213478, 0xA965B311, 0xE059CE36, + 0x583BCFE8, 0x1107B2CF, 0xCA4335A6, 0x837F4881, 0x79264D85, 0x301A30A2, 0xEB5EB7CB, 0xA262CAEC, + 0x9E76C286, 0xD74ABFA1, 0x0C0E38C8, 0x453245EF, 0xBF6B40EB, 0xF6573DCC, 0x2D13BAA5, 0x642FC782, + 0xDC4DC65C, 0x9571BB7B, 0x4E353C12, 0x07094135, 0xFD504431, 0xB46C3916, 0x6F28BE7F, 0x2614C358, + 0x1700AEAB, 0x5E3CD38C, 0x857854E5, 0xCC4429C2, 0x361D2CC6, 0x7F2151E1, 0xA465D688, 0xED59ABAF, + 0x553BAA71, 0x1C07D756, 0xC743503F, 0x8E7F2D18, 0x7426281C, 0x3D1A553B, 0xE65ED252, 0xAF62AF75, + 0x9376A71F, 0xDA4ADA38, 0x010E5D51, 0x48322076, 0xB26B2572, 0xFB575855, 0x2013DF3C, 0x692FA21B, + 0xD14DA3C5, 0x9871DEE2, 0x4335598B, 0x0A0924AC, 0xF05021A8, 0xB96C5C8F, 0x6228DBE6, 0x2B14A6C1, + 0x34019664, 0x7D3DEB43, 0xA6796C2A, 0xEF45110D, 0x151C1409, 0x5C20692E, 0x8764EE47, 0xCE589360, + 0x763A92BE, 0x3F06EF99, 0xE44268F0, 0xAD7E15D7, 0x572710D3, 0x1E1B6DF4, 0xC55FEA9D, 0x8C6397BA, + 0xB0779FD0, 0xF94BE2F7, 0x220F659E, 0x6B3318B9, 0x916A1DBD, 0xD856609A, 0x0312E7F3, 0x4A2E9AD4, + 0xF24C9B0A, 0xBB70E62D, 0x60346144, 0x29081C63, 0xD3511967, 0x9A6D6440, 0x4129E329, 0x08159E0E, + 0x3901F3FD, 0x703D8EDA, 0xAB7909B3, 0xE2457494, 0x181C7190, 0x51200CB7, 0x8A648BDE, 0xC358F6F9, + 0x7B3AF727, 0x32068A00, 0xE9420D69, 0xA07E704E, 0x5A27754A, 0x131B086D, 0xC85F8F04, 0x8163F223, + 0xBD77FA49, 0xF44B876E, 0x2F0F0007, 0x66337D20, 0x9C6A7824, 0xD5560503, 0x0E12826A, 0x472EFF4D, + 0xFF4CFE93, 0xB67083B4, 0x6D3404DD, 0x240879FA, 0xDE517CFE, 0x976D01D9, 0x4C2986B0, 0x0515FB97, + 0x2E015D56, 0x673D2071, 0xBC79A718, 0xF545DA3F, 0x0F1CDF3B, 0x4620A21C, 0x9D642575, 0xD4585852, + 0x6C3A598C, 0x250624AB, 0xFE42A3C2, 0xB77EDEE5, 0x4D27DBE1, 0x041BA6C6, 0xDF5F21AF, 0x96635C88, + 0xAA7754E2, 0xE34B29C5, 0x380FAEAC, 0x7133D38B, 0x8B6AD68F, 0xC256ABA8, 0x19122CC1, 0x502E51E6, + 0xE84C5038, 0xA1702D1F, 0x7A34AA76, 0x3308D751, 0xC951D255, 0x806DAF72, 0x5B29281B, 0x1215553C, + 0x230138CF, 0x6A3D45E8, 0xB179C281, 0xF845BFA6, 0x021CBAA2, 0x4B20C785, 0x906440EC, 0xD9583DCB, + 0x613A3C15, 0x28064132, 0xF342C65B, 0xBA7EBB7C, 0x4027BE78, 0x091BC35F, 0xD25F4436, 0x9B633911, + 0xA777317B, 0xEE4B4C5C, 0x350FCB35, 0x7C33B612, 0x866AB316, 0xCF56CE31, 0x14124958, 0x5D2E347F, + 0xE54C35A1, 0xAC704886, 0x7734CFEF, 0x3E08B2C8, 0xC451B7CC, 0x8D6DCAEB, 0x56294D82, 0x1F1530A5 +}}; + +#define CRC32_UPD(crc, n) \ + (crc32c_tables[(n)][(crc) & 0xFF] ^ \ + crc32c_tables[(n)-1][((crc) >> 8) & 0xFF]) + +static inline uint32_t +crc32c_1byte(uint8_t data, uint32_t init_val) +{ + uint32_t crc; + crc = init_val; + crc ^= data; + + return crc32c_tables[0][crc & 0xff] ^ (crc >> 8); +} + +static inline uint32_t +crc32c_2bytes(uint16_t data, uint32_t init_val) +{ + uint32_t crc; + crc = init_val; + crc ^= data; + + crc = CRC32_UPD(crc, 1) ^ (crc >> 16); + + return crc; +} + +static inline uint32_t +crc32c_1word(uint32_t data, uint32_t init_val) +{ + uint32_t crc, term1, term2; + crc = init_val; + crc ^= data; + + term1 = CRC32_UPD(crc, 3); + term2 = crc >> 16; + crc = term1 ^ CRC32_UPD(term2, 1); + + return crc; +} + +static inline uint32_t +crc32c_2words(uint64_t data, uint32_t init_val) +{ + union { + uint64_t u64; + uint32_t u32[2]; + } d; + d.u64 = data; + + uint32_t crc, term1, term2; + + crc = init_val; + crc ^= d.u32[0]; + + term1 = CRC32_UPD(crc, 7); + term2 = crc >> 16; + crc = term1 ^ CRC32_UPD(term2, 5); + term1 = CRC32_UPD(d.u32[1], 3); + term2 = d.u32[1] >> 16; + crc ^= term1 ^ CRC32_UPD(term2, 1); + + return crc; +} + +#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64) +static inline uint32_t +crc32c_sse42_u8(uint8_t data, uint32_t init_val) +{ + __asm__ volatile( + "crc32b %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} + +static inline uint32_t +crc32c_sse42_u16(uint16_t data, uint32_t init_val) +{ + __asm__ volatile( + "crc32w %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} + +static inline uint32_t +crc32c_sse42_u32(uint32_t data, uint32_t init_val) +{ + __asm__ volatile( + "crc32l %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} + +static inline uint32_t +crc32c_sse42_u64_mimic(uint64_t data, uint64_t init_val) +{ + union { + uint32_t u32[2]; + uint64_t u64; + } d; + + d.u64 = data; + init_val = crc32c_sse42_u32(d.u32[0], init_val); + init_val = crc32c_sse42_u32(d.u32[1], init_val); + return init_val; +} +#endif + +#ifdef RTE_ARCH_X86_64 +static inline uint32_t +crc32c_sse42_u64(uint64_t data, uint64_t init_val) +{ + __asm__ volatile( + "crc32q %[data], %[init_val];" + : [init_val] "+r" (init_val) + : [data] "rm" (data)); + return init_val; +} +#endif + +#define CRC32_SW (1U << 0) +#define CRC32_SSE42 (1U << 1) +#define CRC32_x64 (1U << 2) +#define CRC32_SSE42_x64 (CRC32_x64|CRC32_SSE42) +#define CRC32_ARM64 (1U << 3) + +static uint8_t crc32_alg = CRC32_SW; + +#if defined(RTE_ARCH_ARM64) +#include "rte_crc_arm64.h" +#else + +/** + * Allow or disallow use of SSE4.2 instrinsics for CRC32 hash + * calculation. + * + * @param alg + * An OR of following flags: + * - (CRC32_SW) Don't use SSE4.2 intrinsics + * - (CRC32_SSE42) Use SSE4.2 intrinsics if available + * - (CRC32_SSE42_x64) Use 64-bit SSE4.2 intrinsic if available (default) + * + */ +static inline void +rte_hash_crc_set_alg(uint8_t alg) +{ + switch (alg) { +#if defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_64) + case CRC32_SSE42_x64: + if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_EM64T)) + alg = CRC32_SSE42; + case CRC32_SSE42: + if (! rte_cpu_get_flag_enabled(RTE_CPUFLAG_SSE4_2)) + alg = CRC32_SW; +#endif + case CRC32_SW: + crc32_alg = alg; + default: + break; + } +} + +/* Setting the best available algorithm */ +static inline void __attribute__((constructor)) +rte_hash_crc_init_alg(void) +{ + rte_hash_crc_set_alg(CRC32_SSE42_x64); +} + +/** + * Use single crc32 instruction to perform a hash on a byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_1byte(uint8_t data, uint32_t init_val) +{ +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u8(data, init_val); +#endif + + return crc32c_1byte(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 2 bytes value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_2byte(uint16_t data, uint32_t init_val) +{ +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u16(data, init_val); +#endif + + return crc32c_2bytes(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 4 byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_4byte(uint32_t data, uint32_t init_val) +{ +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u32(data, init_val); +#endif + + return crc32c_1word(data, init_val); +} + +/** + * Use single crc32 instruction to perform a hash on a 8 byte value. + * Fall back to software crc32 implementation in case SSE4.2 is + * not supported + * + * @param data + * Data to perform hash on. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc_8byte(uint64_t data, uint32_t init_val) +{ +#ifdef RTE_ARCH_X86_64 + if (likely(crc32_alg == CRC32_SSE42_x64)) + return crc32c_sse42_u64(data, init_val); +#endif + +#if defined RTE_ARCH_I686 || defined RTE_ARCH_X86_64 + if (likely(crc32_alg & CRC32_SSE42)) + return crc32c_sse42_u64_mimic(data, init_val); +#endif + + return crc32c_2words(data, init_val); +} + +#endif + +/** + * Calculate CRC32 hash on user-supplied byte array. + * + * @param data + * Data to perform hash on. + * @param data_len + * How many bytes to use to calculate hash value. + * @param init_val + * Value to initialise hash generator. + * @return + * 32bit calculated hash value. + */ +static inline uint32_t +rte_hash_crc(const void *data, uint32_t data_len, uint32_t init_val) +{ + unsigned i; + uintptr_t pd = (uintptr_t) data; + + for (i = 0; i < data_len / 8; i++) { + init_val = rte_hash_crc_8byte(*(const uint64_t *)pd, init_val); + pd += 8; + } + + if (data_len & 0x4) { + init_val = rte_hash_crc_4byte(*(const uint32_t *)pd, init_val); + pd += 4; + } + + if (data_len & 0x2) { + init_val = rte_hash_crc_2byte(*(const uint16_t *)pd, init_val); + pd += 2; + } + + if (data_len & 0x1) + init_val = rte_hash_crc_1byte(*(const uint8_t *)pd, init_val); + + return init_val; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_HASH_CRC_H_ */ diff --git a/lib/librte_hash/rte_hash_version.map b/lib/librte_hash/rte_hash_version.map new file mode 100644 index 00000000..4f25436e --- /dev/null +++ b/lib/librte_hash/rte_hash_version.map @@ -0,0 +1,40 @@ +DPDK_2.0 { + global: + + rte_fbk_hash_create; + rte_fbk_hash_find_existing; + rte_fbk_hash_free; + rte_hash_add_key; + rte_hash_add_key_with_hash; + rte_hash_create; + rte_hash_del_key; + rte_hash_del_key_with_hash; + rte_hash_find_existing; + rte_hash_free; + rte_hash_hash; + rte_hash_lookup; + rte_hash_lookup_bulk; + rte_hash_lookup_with_hash; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_hash_add_key_data; + rte_hash_add_key_with_hash_data; + rte_hash_iterate; + rte_hash_lookup_bulk_data; + rte_hash_lookup_data; + rte_hash_lookup_with_hash_data; + rte_hash_reset; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_hash_set_cmp_func; + +} DPDK_2.1; diff --git a/lib/librte_hash/rte_jhash.h b/lib/librte_hash/rte_jhash.h new file mode 100644 index 00000000..207478c2 --- /dev/null +++ b/lib/librte_hash/rte_jhash.h @@ -0,0 +1,410 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_JHASH_H +#define _RTE_JHASH_H + +/** + * @file + * + * jhash functions. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +#include +#include + +/* jhash.h: Jenkins hash support. + * + * Copyright (C) 2006 Bob Jenkins (bob_jenkins@burtleburtle.net) + * + * http://burtleburtle.net/bob/hash/ + * + * These are the credits from Bob's sources: + * + * lookup3.c, by Bob Jenkins, May 2006, Public Domain. + * + * These are functions for producing 32-bit hashes for hash table lookup. + * hashword(), hashlittle(), hashlittle2(), hashbig(), mix(), and final() + * are externally useful functions. Routines to test the hash are included + * if SELF_TEST is defined. You can use this free for any purpose. It's in + * the public domain. It has no warranty. + * + * $FreeBSD$ + */ + +#define rot(x, k) (((x) << (k)) | ((x) >> (32-(k)))) + +/** @internal Internal function. NOTE: Arguments are modified. */ +#define __rte_jhash_mix(a, b, c) do { \ + a -= c; a ^= rot(c, 4); c += b; \ + b -= a; b ^= rot(a, 6); a += c; \ + c -= b; c ^= rot(b, 8); b += a; \ + a -= c; a ^= rot(c, 16); c += b; \ + b -= a; b ^= rot(a, 19); a += c; \ + c -= b; c ^= rot(b, 4); b += a; \ +} while (0) + +#define __rte_jhash_final(a, b, c) do { \ + c ^= b; c -= rot(b, 14); \ + a ^= c; a -= rot(c, 11); \ + b ^= a; b -= rot(a, 25); \ + c ^= b; c -= rot(b, 16); \ + a ^= c; a -= rot(c, 4); \ + b ^= a; b -= rot(a, 14); \ + c ^= b; c -= rot(b, 24); \ +} while (0) + +/** The golden ratio: an arbitrary value. */ +#define RTE_JHASH_GOLDEN_RATIO 0xdeadbeef + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN +#define BIT_SHIFT(x, y, k) (((x) >> (k)) | ((uint64_t)(y) << (32-(k)))) +#else +#define BIT_SHIFT(x, y, k) (((uint64_t)(x) << (k)) | ((y) >> (32-(k)))) +#endif + +#define LOWER8b_MASK rte_le_to_cpu_32(0xff) +#define LOWER16b_MASK rte_le_to_cpu_32(0xffff) +#define LOWER24b_MASK rte_le_to_cpu_32(0xffffff) + +static inline void +__rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, + uint32_t *pb, unsigned check_align) +{ + uint32_t a, b, c; + + /* Set up the internal state */ + a = b = c = RTE_JHASH_GOLDEN_RATIO + ((uint32_t)length) + *pc; + c += *pb; + + /* + * Check key alignment. For x86 architecture, first case is always optimal + * If check_align is not set, first case will be used + */ +#if defined(RTE_ARCH_X86_64) || defined(RTE_ARCH_I686) || defined(RTE_ARCH_X86_X32) + const uint32_t *k = (const uint32_t *)key; + const uint32_t s = 0; +#else + const uint32_t *k = (uint32_t *)((uintptr_t)key & (uintptr_t)~3); + const uint32_t s = ((uintptr_t)key & 3) * CHAR_BIT; +#endif + if (!check_align || s == 0) { + while (length > 12) { + a += k[0]; + b += k[1]; + c += k[2]; + + __rte_jhash_mix(a, b, c); + + k += 3; + length -= 12; + } + + switch (length) { + case 12: + c += k[2]; b += k[1]; a += k[0]; break; + case 11: + c += k[2] & LOWER24b_MASK; b += k[1]; a += k[0]; break; + case 10: + c += k[2] & LOWER16b_MASK; b += k[1]; a += k[0]; break; + case 9: + c += k[2] & LOWER8b_MASK; b += k[1]; a += k[0]; break; + case 8: + b += k[1]; a += k[0]; break; + case 7: + b += k[1] & LOWER24b_MASK; a += k[0]; break; + case 6: + b += k[1] & LOWER16b_MASK; a += k[0]; break; + case 5: + b += k[1] & LOWER8b_MASK; a += k[0]; break; + case 4: + a += k[0]; break; + case 3: + a += k[0] & LOWER24b_MASK; break; + case 2: + a += k[0] & LOWER16b_MASK; break; + case 1: + a += k[0] & LOWER8b_MASK; break; + /* zero length strings require no mixing */ + case 0: + *pc = c; + *pb = b; + return; + }; + } else { + /* all but the last block: affect some 32 bits of (a, b, c) */ + while (length > 12) { + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s); + __rte_jhash_mix(a, b, c); + + k += 3; + length -= 12; + } + + /* last block: affect all 32 bits of (c) */ + switch (length) { + case 12: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s); + break; + case 11: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER24b_MASK; + break; + case 10: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER16b_MASK; + break; + case 9: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + c += BIT_SHIFT(k[2], k[3], s) & LOWER8b_MASK; + break; + case 8: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s); + break; + case 7: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER24b_MASK; + break; + case 6: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER16b_MASK; + break; + case 5: + a += BIT_SHIFT(k[0], k[1], s); + b += BIT_SHIFT(k[1], k[2], s) & LOWER8b_MASK; + break; + case 4: + a += BIT_SHIFT(k[0], k[1], s); + break; + case 3: + a += BIT_SHIFT(k[0], k[1], s) & LOWER24b_MASK; + break; + case 2: + a += BIT_SHIFT(k[0], k[1], s) & LOWER16b_MASK; + break; + case 1: + a += BIT_SHIFT(k[0], k[1], s) & LOWER8b_MASK; + break; + /* zero length strings require no mixing */ + case 0: + *pc = c; + *pb = b; + return; + } + } + + __rte_jhash_final(a, b, c); + + *pc = c; + *pb = b; +} + +/** + * Same as rte_jhash, but takes two seeds and return two uint32_ts. + * pc and pb must be non-null, and *pc and *pb must both be initialized + * with seeds. If you pass in (*pb)=0, the output (*pc) will be + * the same as the return value from rte_jhash. + * + * @param key + * Key to calculate hash of. + * @param length + * Length of key in bytes. + * @param pc + * IN: seed OUT: primary hash value. + * @param pb + * IN: second seed OUT: secondary hash value. + */ +static inline void +rte_jhash_2hashes(const void *key, uint32_t length, uint32_t *pc, uint32_t *pb) +{ + __rte_jhash_2hashes(key, length, pc, pb, 1); +} + +/** + * Same as rte_jhash_32b, but takes two seeds and return two uint32_ts. + * pc and pb must be non-null, and *pc and *pb must both be initialized + * with seeds. If you pass in (*pb)=0, the output (*pc) will be + * the same as the return value from rte_jhash_32b. + * + * @param k + * Key to calculate hash of. + * @param length + * Length of key in units of 4 bytes. + * @param pc + * IN: seed OUT: primary hash value. + * @param pb + * IN: second seed OUT: secondary hash value. + */ +static inline void +rte_jhash_32b_2hashes(const uint32_t *k, uint32_t length, uint32_t *pc, uint32_t *pb) +{ + __rte_jhash_2hashes((const void *) k, (length << 2), pc, pb, 0); +} + +/** + * The most generic version, hashes an arbitrary sequence + * of bytes. No alignment or length assumptions are made about + * the input key. + * + * @param key + * Key to calculate hash of. + * @param length + * Length of key in bytes. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash(const void *key, uint32_t length, uint32_t initval) +{ + uint32_t initval2 = 0; + + rte_jhash_2hashes(key, length, &initval, &initval2); + + return initval; +} + +/** + * A special optimized version that handles 1 or more of uint32_ts. + * The length parameter here is the number of uint32_ts in the key. + * + * @param k + * Key to calculate hash of. + * @param length + * Length of key in units of 4 bytes. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_32b(const uint32_t *k, uint32_t length, uint32_t initval) +{ + uint32_t initval2 = 0; + + rte_jhash_32b_2hashes(k, length, &initval, &initval2); + + return initval; +} + +static inline uint32_t +__rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval) +{ + a += RTE_JHASH_GOLDEN_RATIO + initval; + b += RTE_JHASH_GOLDEN_RATIO + initval; + c += RTE_JHASH_GOLDEN_RATIO + initval; + + __rte_jhash_final(a, b, c); + + return c; +} + +/** + * A special ultra-optimized versions that knows it is hashing exactly + * 3 words. + * + * @param a + * First word to calculate hash of. + * @param b + * Second word to calculate hash of. + * @param c + * Third word to calculate hash of. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_3words(uint32_t a, uint32_t b, uint32_t c, uint32_t initval) +{ + return __rte_jhash_3words(a + 12, b + 12, c + 12, initval); +} + +/** + * A special ultra-optimized versions that knows it is hashing exactly + * 2 words. + * + * @param a + * First word to calculate hash of. + * @param b + * Second word to calculate hash of. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_2words(uint32_t a, uint32_t b, uint32_t initval) +{ + return __rte_jhash_3words(a + 8, b + 8, 8, initval); +} + +/** + * A special ultra-optimized versions that knows it is hashing exactly + * 1 word. + * + * @param a + * Word to calculate hash of. + * @param initval + * Initialising value of hash. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_jhash_1word(uint32_t a, uint32_t initval) +{ + return __rte_jhash_3words(a + 4, 4, 4, initval); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_JHASH_H */ diff --git a/lib/librte_hash/rte_thash.h b/lib/librte_hash/rte_thash.h new file mode 100644 index 00000000..d98e98e7 --- /dev/null +++ b/lib/librte_hash/rte_thash.h @@ -0,0 +1,250 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Vladimir Medvedkin + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_THASH_H +#define _RTE_THASH_H + +/** + * @file + * + * toeplitz hash functions. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Software implementation of the Toeplitz hash function used by RSS. + * Can be used either for packet distribution on single queue NIC + * or for simulating of RSS computation on specific NIC (for example + * after GRE header decapsulating) + */ + +#include +#include +#include + +#ifdef __SSE3__ +#include +#endif + +#ifdef __SSE3__ +/* Byte swap mask used for converting IPv6 address + * 4-byte chunks to CPU byte order + */ +static const __m128i rte_thash_ipv6_bswap_mask = { + 0x0405060700010203ULL, 0x0C0D0E0F08090A0BULL}; +#endif + +/** + * length in dwords of input tuple to + * calculate hash of ipv4 header only + */ +#define RTE_THASH_V4_L3_LEN ((sizeof(struct rte_ipv4_tuple) - \ + sizeof(((struct rte_ipv4_tuple *)0)->sctp_tag)) / 4) + +/** + * length in dwords of input tuple to + * calculate hash of ipv4 header + + * transport header + */ +#define RTE_THASH_V4_L4_LEN ((sizeof(struct rte_ipv4_tuple)) / 4) + +/** + * length in dwords of input tuple to + * calculate hash of ipv6 header only + */ +#define RTE_THASH_V6_L3_LEN ((sizeof(struct rte_ipv6_tuple) - \ + sizeof(((struct rte_ipv6_tuple *)0)->sctp_tag)) / 4) + +/** + * length in dwords of input tuple to + * calculate hash of ipv6 header + + * transport header + */ +#define RTE_THASH_V6_L4_LEN ((sizeof(struct rte_ipv6_tuple)) / 4) + +/** + * IPv4 tuple + * addresses and ports/sctp_tag have to be CPU byte order + */ +struct rte_ipv4_tuple { + uint32_t src_addr; + uint32_t dst_addr; + union { + struct { + uint16_t dport; + uint16_t sport; + }; + uint32_t sctp_tag; + }; +}; + +/** + * IPv6 tuple + * Addresses have to be filled by rte_thash_load_v6_addr() + * ports/sctp_tag have to be CPU byte order + */ +struct rte_ipv6_tuple { + uint8_t src_addr[16]; + uint8_t dst_addr[16]; + union { + struct { + uint16_t dport; + uint16_t sport; + }; + uint32_t sctp_tag; + }; +}; + +union rte_thash_tuple { + struct rte_ipv4_tuple v4; + struct rte_ipv6_tuple v6; +#ifdef __SSE3__ +} __attribute__((aligned(XMM_SIZE))); +#else +}; +#endif + +/** + * Prepare special converted key to use with rte_softrss_be() + * @param orig + * pointer to original RSS key + * @param targ + * pointer to target RSS key + * @param len + * RSS key length + */ +static inline void +rte_convert_rss_key(const uint32_t *orig, uint32_t *targ, int len) +{ + int i; + + for (i = 0; i < (len >> 2); i++) + targ[i] = rte_be_to_cpu_32(orig[i]); +} + +/** + * Prepare and load IPv6 addresses (src and dst) + * into target tuple + * @param orig + * Pointer to ipv6 header of the original packet + * @param targ + * Pointer to rte_ipv6_tuple structure + */ +static inline void +rte_thash_load_v6_addrs(const struct ipv6_hdr *orig, union rte_thash_tuple *targ) +{ +#ifdef __SSE3__ + __m128i ipv6 = _mm_loadu_si128((const __m128i *)orig->src_addr); + *(__m128i *)targ->v6.src_addr = + _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); + ipv6 = _mm_loadu_si128((const __m128i *)orig->dst_addr); + *(__m128i *)targ->v6.dst_addr = + _mm_shuffle_epi8(ipv6, rte_thash_ipv6_bswap_mask); +#else + int i; + for (i = 0; i < 4; i++) { + *((uint32_t *)targ->v6.src_addr + i) = + rte_be_to_cpu_32(*((const uint32_t *)orig->src_addr + i)); + *((uint32_t *)targ->v6.dst_addr + i) = + rte_be_to_cpu_32(*((const uint32_t *)orig->dst_addr + i)); + } +#endif +} + +/** + * Generic implementation. Can be used with original rss_key + * @param input_tuple + * Pointer to input tuple + * @param input_len + * Length of input_tuple in 4-bytes chunks + * @param rss_key + * Pointer to RSS hash key. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_softrss(uint32_t *input_tuple, uint32_t input_len, + const uint8_t *rss_key) +{ + uint32_t i, j, ret = 0; + + for (j = 0; j < input_len; j++) { + for (i = 0; i < 32; i++) { + if (input_tuple[j] & (1 << (31 - i))) { + ret ^= rte_cpu_to_be_32(((const uint32_t *)rss_key)[j]) << i | + (uint32_t)((uint64_t)(rte_cpu_to_be_32(((const uint32_t *)rss_key)[j + 1])) >> + (32 - i)); + } + } + } + return ret; +} + +/** + * Optimized implementation. + * If you want the calculated hash value matches NIC RSS value + * you have to use special converted key with rte_convert_rss_key() fn. + * @param input_tuple + * Pointer to input tuple + * @param input_len + * Length of input_tuple in 4-bytes chunks + * @param *rss_key + * Pointer to RSS hash key. + * @return + * Calculated hash value. + */ +static inline uint32_t +rte_softrss_be(uint32_t *input_tuple, uint32_t input_len, + const uint8_t *rss_key) +{ + uint32_t i, j, ret = 0; + + for (j = 0; j < input_len; j++) { + for (i = 0; i < 32; i++) { + if (input_tuple[j] & (1 << (31 - i))) { + ret ^= ((const uint32_t *)rss_key)[j] << i | + (uint32_t)((uint64_t)(((const uint32_t *)rss_key)[j + 1]) >> (32 - i)); + } + } + } + return ret; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_THASH_H */ diff --git a/lib/librte_ip_frag/Makefile b/lib/librte_ip_frag/Makefile new file mode 100644 index 00000000..9d06780d --- /dev/null +++ b/lib/librte_ip_frag/Makefile @@ -0,0 +1,59 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_ip_frag.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_ipfrag_version.map + +LIBABIVER := 1 + +#source files +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_fragmentation.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_fragmentation.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv4_reassembly.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ipv6_reassembly.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += rte_ip_frag_common.c +SRCS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += ip_frag_internal.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_IP_FRAG)-include += rte_ip_frag.h + + +# this library depends on rte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_IP_FRAG) += lib/librte_mempool lib/librte_ether + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ip_frag/ip_frag_common.h b/lib/librte_ip_frag/ip_frag_common.h new file mode 100644 index 00000000..cde6ed4a --- /dev/null +++ b/lib/librte_ip_frag/ip_frag_common.h @@ -0,0 +1,169 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _IP_FRAG_COMMON_H_ +#define _IP_FRAG_COMMON_H_ + +#include "rte_ip_frag.h" + +/* logging macros. */ +#ifdef RTE_LIBRTE_IP_FRAG_DEBUG + +#define IP_FRAG_LOG(lvl, fmt, args...) RTE_LOG(lvl, USER1, fmt, ##args) + +#define IP_FRAG_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("function %s, line%d\tassert \"" #exp "\" failed\n", \ + __func__, __LINE__); \ +} +#else +#define IP_FRAG_LOG(lvl, fmt, args...) do {} while(0) +#define IP_FRAG_ASSERT(exp) do {} while (0) +#endif /* IP_FRAG_DEBUG */ + +#define IPV4_KEYLEN 1 +#define IPV6_KEYLEN 4 + +/* helper macros */ +#define IP_FRAG_MBUF2DR(dr, mb) ((dr)->row[(dr)->cnt++] = (mb)) + +#define IPv6_KEY_BYTES(key) \ + (key)[0], (key)[1], (key)[2], (key)[3] +#define IPv6_KEY_BYTES_FMT \ + "%08" PRIx64 "%08" PRIx64 "%08" PRIx64 "%08" PRIx64 + +/* internal functions declarations */ +struct rte_mbuf * ip_frag_process(struct ip_frag_pkt *fp, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, + uint16_t ofs, uint16_t len, uint16_t more_frags); + +struct ip_frag_pkt * ip_frag_find(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + const struct ip_frag_key *key, uint64_t tms); + +struct ip_frag_pkt * ip_frag_lookup(struct rte_ip_frag_tbl *tbl, + const struct ip_frag_key *key, uint64_t tms, + struct ip_frag_pkt **free, struct ip_frag_pkt **stale); + +/* these functions need to be declared here as ip_frag_process relies on them */ +struct rte_mbuf * ipv4_frag_reassemble(const struct ip_frag_pkt *fp); +struct rte_mbuf * ipv6_frag_reassemble(const struct ip_frag_pkt *fp); + + + +/* + * misc frag key functions + */ + +/* check if key is empty */ +static inline int +ip_frag_key_is_empty(const struct ip_frag_key * key) +{ + uint32_t i; + for (i = 0; i < RTE_MIN(key->key_len, RTE_DIM(key->src_dst)); i++) + if (key->src_dst[i] != 0) + return 0; + return 1; +} + +/* empty the key */ +static inline void +ip_frag_key_invalidate(struct ip_frag_key * key) +{ + uint32_t i; + for (i = 0; i < key->key_len; i++) + key->src_dst[i] = 0; +} + +/* compare two keys */ +static inline int +ip_frag_key_cmp(const struct ip_frag_key * k1, const struct ip_frag_key * k2) +{ + uint32_t i, val; + val = k1->id ^ k2->id; + for (i = 0; i < k1->key_len; i++) + val |= k1->src_dst[i] ^ k2->src_dst[i]; + return val; +} + +/* + * misc fragment functions + */ + +/* put fragment on death row */ +static inline void +ip_frag_free(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr) +{ + uint32_t i, k; + + k = dr->cnt; + for (i = 0; i != fp->last_idx; i++) { + if (fp->frags[i].mb != NULL) { + dr->row[k++] = fp->frags[i].mb; + fp->frags[i].mb = NULL; + } + } + + fp->last_idx = 0; + dr->cnt = k; +} + +/* if key is empty, mark key as in use */ +static inline void +ip_frag_inuse(struct rte_ip_frag_tbl *tbl, const struct ip_frag_pkt *fp) +{ + if (ip_frag_key_is_empty(&fp->key)) { + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + } +} + +/* reset the fragment */ +static inline void +ip_frag_reset(struct ip_frag_pkt *fp, uint64_t tms) +{ + static const struct ip_frag zero_frag = { + .ofs = 0, + .len = 0, + .mb = NULL, + }; + + fp->start = tms; + fp->total_size = UINT32_MAX; + fp->frag_size = 0; + fp->last_idx = IP_MIN_FRAG_NUM; + fp->frags[IP_LAST_FRAG_IDX] = zero_frag; + fp->frags[IP_FIRST_FRAG_IDX] = zero_frag; +} + +#endif /* _IP_FRAG_COMMON_H_ */ diff --git a/lib/librte_ip_frag/ip_frag_internal.c b/lib/librte_ip_frag/ip_frag_internal.c new file mode 100644 index 00000000..b679ff43 --- /dev/null +++ b/lib/librte_ip_frag/ip_frag_internal.c @@ -0,0 +1,418 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 +#include +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + +#include "ip_frag_common.h" + +#define PRIME_VALUE 0xeaad8405 + +#define IP_FRAG_TBL_POS(tbl, sig) \ + ((tbl)->pkt + ((sig) & (tbl)->entry_mask)) + +#ifdef RTE_LIBRTE_IP_FRAG_TBL_STAT +#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) ((s)->f += (v)) +#else +#define IP_FRAG_TBL_STAT_UPDATE(s, f, v) do {} while (0) +#endif /* IP_FRAG_TBL_STAT */ + +/* local frag table helper functions */ +static inline void +ip_frag_tbl_del(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + struct ip_frag_pkt *fp) +{ + ip_frag_free(fp, dr); + ip_frag_key_invalidate(&fp->key); + TAILQ_REMOVE(&tbl->lru, fp, lru); + tbl->use_entries--; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, del_num, 1); +} + +static inline void +ip_frag_tbl_add(struct rte_ip_frag_tbl *tbl, struct ip_frag_pkt *fp, + const struct ip_frag_key *key, uint64_t tms) +{ + fp->key = key[0]; + ip_frag_reset(fp, tms); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + tbl->use_entries++; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, add_num, 1); +} + +static inline void +ip_frag_tbl_reuse(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + struct ip_frag_pkt *fp, uint64_t tms) +{ + ip_frag_free(fp, dr); + ip_frag_reset(fp, tms); + TAILQ_REMOVE(&tbl->lru, fp, lru); + TAILQ_INSERT_TAIL(&tbl->lru, fp, lru); + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, reuse_num, 1); +} + + +static inline void +ipv4_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) +{ + uint32_t v; + const uint32_t *p; + + p = (const uint32_t *)&key->src_dst; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + v = rte_hash_crc_4byte(p[0], PRIME_VALUE); + v = rte_hash_crc_4byte(p[1], v); + v = rte_hash_crc_4byte(key->id, v); +#else + + v = rte_jhash_3words(p[0], p[1], key->id, PRIME_VALUE); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + + *v1 = v; + *v2 = (v << 7) + (v >> 14); +} + +static inline void +ipv6_frag_hash(const struct ip_frag_key *key, uint32_t *v1, uint32_t *v2) +{ + uint32_t v; + const uint32_t *p; + + p = (const uint32_t *) &key->src_dst; + +#ifdef RTE_MACHINE_CPUFLAG_SSE4_2 + v = rte_hash_crc_4byte(p[0], PRIME_VALUE); + v = rte_hash_crc_4byte(p[1], v); + v = rte_hash_crc_4byte(p[2], v); + v = rte_hash_crc_4byte(p[3], v); + v = rte_hash_crc_4byte(p[4], v); + v = rte_hash_crc_4byte(p[5], v); + v = rte_hash_crc_4byte(p[6], v); + v = rte_hash_crc_4byte(p[7], v); + v = rte_hash_crc_4byte(key->id, v); +#else + + v = rte_jhash_3words(p[0], p[1], p[2], PRIME_VALUE); + v = rte_jhash_3words(p[3], p[4], p[5], v); + v = rte_jhash_3words(p[6], p[7], key->id, v); +#endif /* RTE_MACHINE_CPUFLAG_SSE4_2 */ + + *v1 = v; + *v2 = (v << 7) + (v >> 14); +} + +struct rte_mbuf * +ip_frag_process(struct ip_frag_pkt *fp, struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint16_t ofs, uint16_t len, uint16_t more_frags) +{ + uint32_t idx; + + fp->frag_size += len; + + /* this is the first fragment. */ + if (ofs == 0) { + idx = (fp->frags[IP_FIRST_FRAG_IDX].mb == NULL) ? + IP_FIRST_FRAG_IDX : UINT32_MAX; + + /* this is the last fragment. */ + } else if (more_frags == 0) { + fp->total_size = ofs + len; + idx = (fp->frags[IP_LAST_FRAG_IDX].mb == NULL) ? + IP_LAST_FRAG_IDX : UINT32_MAX; + + /* this is the intermediate fragment. */ + } else if ((idx = fp->last_idx) < + sizeof (fp->frags) / sizeof (fp->frags[0])) { + fp->last_idx++; + } + + /* + * errorneous packet: either exceeed max allowed number of fragments, + * or duplicate first/last fragment encountered. + */ + if (idx >= sizeof (fp->frags) / sizeof (fp->frags[0])) { + + /* report an error. */ + if (fp->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst[0], fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + else + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + + /* free all fragments, invalidate the entry. */ + ip_frag_free(fp, dr); + ip_frag_key_invalidate(&fp->key); + IP_FRAG_MBUF2DR(dr, mb); + + return NULL; + } + + fp->frags[idx].ofs = ofs; + fp->frags[idx].len = len; + fp->frags[idx].mb = mb; + + mb = NULL; + + /* not all fragments are collected yet. */ + if (likely (fp->frag_size < fp->total_size)) { + return mb; + + /* if we collected all fragments, then try to reassemble. */ + } else if (fp->frag_size == fp->total_size && + fp->frags[IP_FIRST_FRAG_IDX].mb != NULL) { + if (fp->key.key_len == IPV4_KEYLEN) + mb = ipv4_frag_reassemble(fp); + else + mb = ipv6_frag_reassemble(fp); + } + + /* errorenous set of fragments. */ + if (mb == NULL) { + + /* report an error. */ + if (fp->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, fp->key.src_dst[0], fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + else + IP_FRAG_LOG(DEBUG, "%s:%d invalid fragmented packet:\n" + "ipv4_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, " + "total_size: %u, frag_size: %u, last_idx: %u\n" + "first fragment: ofs: %u, len: %u\n" + "last fragment: ofs: %u, len: %u\n\n", + __func__, __LINE__, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, + fp->total_size, fp->frag_size, fp->last_idx, + fp->frags[IP_FIRST_FRAG_IDX].ofs, + fp->frags[IP_FIRST_FRAG_IDX].len, + fp->frags[IP_LAST_FRAG_IDX].ofs, + fp->frags[IP_LAST_FRAG_IDX].len); + + /* free associated resources. */ + ip_frag_free(fp, dr); + } + + /* we are done with that entry, invalidate it. */ + ip_frag_key_invalidate(&fp->key); + return mb; +} + + +/* + * Find an entry in the table for the corresponding fragment. + * If such entry is not present, then allocate a new one. + * If the entry is stale, then free and reuse it. + */ +struct ip_frag_pkt * +ip_frag_find(struct rte_ip_frag_tbl *tbl, struct rte_ip_frag_death_row *dr, + const struct ip_frag_key *key, uint64_t tms) +{ + struct ip_frag_pkt *pkt, *free, *stale, *lru; + uint64_t max_cycles; + + /* + * Actually the two line below are totally redundant. + * they are here, just to make gcc 4.6 happy. + */ + free = NULL; + stale = NULL; + max_cycles = tbl->max_cycles; + + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, find_num, 1); + + if ((pkt = ip_frag_lookup(tbl, key, tms, &free, &stale)) == NULL) { + + /*timed-out entry, free and invalidate it*/ + if (stale != NULL) { + ip_frag_tbl_del(tbl, dr, stale); + free = stale; + + /* + * we found a free entry, check if we can use it. + * If we run out of free entries in the table, then + * check if we have a timed out entry to delete. + */ + } else if (free != NULL && + tbl->max_entries <= tbl->use_entries) { + lru = TAILQ_FIRST(&tbl->lru); + if (max_cycles + lru->start < tms) { + ip_frag_tbl_del(tbl, dr, lru); + } else { + free = NULL; + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, + fail_nospace, 1); + } + } + + /* found a free entry to reuse. */ + if (free != NULL) { + ip_frag_tbl_add(tbl, free, key, tms); + pkt = free; + } + + /* + * we found the flow, but it is already timed out, + * so free associated resources, reposition it in the LRU list, + * and reuse it. + */ + } else if (max_cycles + pkt->start < tms) { + ip_frag_tbl_reuse(tbl, dr, pkt, tms); + } + + IP_FRAG_TBL_STAT_UPDATE(&tbl->stat, fail_total, (pkt == NULL)); + + tbl->last = pkt; + return pkt; +} + +struct ip_frag_pkt * +ip_frag_lookup(struct rte_ip_frag_tbl *tbl, + const struct ip_frag_key *key, uint64_t tms, + struct ip_frag_pkt **free, struct ip_frag_pkt **stale) +{ + struct ip_frag_pkt *p1, *p2; + struct ip_frag_pkt *empty, *old; + uint64_t max_cycles; + uint32_t i, assoc, sig1, sig2; + + empty = NULL; + old = NULL; + + max_cycles = tbl->max_cycles; + assoc = tbl->bucket_entries; + + if (tbl->last != NULL && ip_frag_key_cmp(key, &tbl->last->key) == 0) + return tbl->last; + + /* different hashing methods for IPv4 and IPv6 */ + if (key->key_len == IPV4_KEYLEN) + ipv4_frag_hash(key, &sig1, &sig2); + else + ipv6_frag_hash(key, &sig1, &sig2); + + p1 = IP_FRAG_TBL_POS(tbl, sig1); + p2 = IP_FRAG_TBL_POS(tbl, sig2); + + for (i = 0; i != assoc; i++) { + if (p1->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line0: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p1, i, assoc, + p1[i].key.src_dst[0], p1[i].key.id, p1[i].start); + else + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line0: %p, index: %u from %u\n" + "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p1, i, assoc, + IPv6_KEY_BYTES(p1[i].key.src_dst), p1[i].key.id, p1[i].start); + + if (ip_frag_key_cmp(key, &p1[i].key) == 0) + return p1 + i; + else if (ip_frag_key_is_empty(&p1[i].key)) + empty = (empty == NULL) ? (p1 + i) : empty; + else if (max_cycles + p1[i].start < tms) + old = (old == NULL) ? (p1 + i) : old; + + if (p2->key.key_len == IPV4_KEYLEN) + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line1: %p, index: %u from %u\n" + "key: <%" PRIx64 ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p2, i, assoc, + p2[i].key.src_dst[0], p2[i].key.id, p2[i].start); + else + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt line1: %p, index: %u from %u\n" + "key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 "\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + p2, i, assoc, + IPv6_KEY_BYTES(p2[i].key.src_dst), p2[i].key.id, p2[i].start); + + if (ip_frag_key_cmp(key, &p2[i].key) == 0) + return p2 + i; + else if (ip_frag_key_is_empty(&p2[i].key)) + empty = (empty == NULL) ?( p2 + i) : empty; + else if (max_cycles + p2[i].start < tms) + old = (old == NULL) ? (p2 + i) : old; + } + + *free = empty; + *stale = old; + return NULL; +} diff --git a/lib/librte_ip_frag/rte_ip_frag.h b/lib/librte_ip_frag/rte_ip_frag.h new file mode 100644 index 00000000..92cedf2f --- /dev/null +++ b/lib/librte_ip_frag/rte_ip_frag.h @@ -0,0 +1,363 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_IP_FRAG_H_ +#define _RTE_IP_FRAG_H_ + +/** + * @file + * RTE IP Fragmentation and Reassembly + * + * Implementation of IP packet fragmentation and reassembly. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include +#include +#include +#include + +struct rte_mbuf; + +enum { + IP_LAST_FRAG_IDX, /**< index of last fragment */ + IP_FIRST_FRAG_IDX, /**< index of first fragment */ + IP_MIN_FRAG_NUM, /**< minimum number of fragments */ + IP_MAX_FRAG_NUM = RTE_LIBRTE_IP_FRAG_MAX_FRAG, + /**< maximum number of fragments per packet */ +}; + +/** @internal fragmented mbuf */ +struct ip_frag { + uint16_t ofs; /**< offset into the packet */ + uint16_t len; /**< length of fragment */ + struct rte_mbuf *mb; /**< fragment mbuf */ +}; + +/** @internal to uniquely indetify fragmented datagram. */ +struct ip_frag_key { + uint64_t src_dst[4]; /**< src address, first 8 bytes used for IPv4 */ + uint32_t id; /**< dst address */ + uint32_t key_len; /**< src/dst key length */ +}; + +/* + * @internal Fragmented packet to reassemble. + * First two entries in the frags[] array are for the last and first fragments. + */ +struct ip_frag_pkt { + TAILQ_ENTRY(ip_frag_pkt) lru; /**< LRU list */ + struct ip_frag_key key; /**< fragmentation key */ + uint64_t start; /**< creation timestamp */ + uint32_t total_size; /**< expected reassembled size */ + uint32_t frag_size; /**< size of fragments received */ + uint32_t last_idx; /**< index of next entry to fill */ + struct ip_frag frags[IP_MAX_FRAG_NUM]; /**< fragments */ +} __rte_cache_aligned; + +#define IP_FRAG_DEATH_ROW_LEN 32 /**< death row size (in packets) */ + +/** mbuf death row (packets to be freed) */ +struct rte_ip_frag_death_row { + uint32_t cnt; /**< number of mbufs currently on death row */ + struct rte_mbuf *row[IP_FRAG_DEATH_ROW_LEN * (IP_MAX_FRAG_NUM + 1)]; + /**< mbufs to be freed */ +}; + +TAILQ_HEAD(ip_pkt_list, ip_frag_pkt); /**< @internal fragments tailq */ + +/** fragmentation table statistics */ +struct ip_frag_tbl_stat { + uint64_t find_num; /**< total # of find/insert attempts. */ + uint64_t add_num; /**< # of add ops. */ + uint64_t del_num; /**< # of del ops. */ + uint64_t reuse_num; /**< # of reuse (del/add) ops. */ + uint64_t fail_total; /**< total # of add failures. */ + uint64_t fail_nospace; /**< # of 'no space' add failures. */ +} __rte_cache_aligned; + +/** fragmentation table */ +struct rte_ip_frag_tbl { + uint64_t max_cycles; /**< ttl for table entries. */ + uint32_t entry_mask; /**< hash value mask. */ + uint32_t max_entries; /**< max entries allowed. */ + uint32_t use_entries; /**< entries in use. */ + uint32_t bucket_entries; /**< hash assocaitivity. */ + uint32_t nb_entries; /**< total size of the table. */ + uint32_t nb_buckets; /**< num of associativity lines. */ + struct ip_frag_pkt *last; /**< last used entry. */ + struct ip_pkt_list lru; /**< LRU list for table entries. */ + struct ip_frag_tbl_stat stat; /**< statistics counters. */ + struct ip_frag_pkt pkt[0]; /**< hash table. */ +}; + +/** IPv6 fragment extension header */ +#define RTE_IPV6_EHDR_MF_SHIFT 0 +#define RTE_IPV6_EHDR_MF_MASK 1 +#define RTE_IPV6_EHDR_FO_SHIFT 3 +#define RTE_IPV6_EHDR_FO_MASK (~((1 << RTE_IPV6_EHDR_FO_SHIFT) - 1)) + +#define RTE_IPV6_FRAG_USED_MASK \ + (RTE_IPV6_EHDR_MF_MASK | RTE_IPV6_EHDR_FO_MASK) + +#define RTE_IPV6_GET_MF(x) ((x) & RTE_IPV6_EHDR_MF_MASK) +#define RTE_IPV6_GET_FO(x) ((x) >> RTE_IPV6_EHDR_FO_SHIFT) + +#define RTE_IPV6_SET_FRAG_DATA(fo, mf) \ + (((fo) & RTE_IPV6_EHDR_FO_MASK) | ((mf) & RTE_IPV6_EHDR_MF_MASK)) + +struct ipv6_extension_fragment { + uint8_t next_header; /**< Next header type */ + uint8_t reserved; /**< Reserved */ + uint16_t frag_data; /**< All fragmentation data */ + uint32_t id; /**< Packet ID */ +} __attribute__((__packed__)); + + + +/* + * Create a new IP fragmentation table. + * + * @param bucket_num + * Number of buckets in the hash table. + * @param bucket_entries + * Number of entries per bucket (e.g. hash associativity). + * Should be power of two. + * @param max_entries + * Maximum number of entries that could be stored in the table. + * The value should be less or equal then bucket_num * bucket_entries. + * @param max_cycles + * Maximum TTL in cycles for each fragmented packet. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA constraints. + * @return + * The pointer to the new allocated fragmentation table, on success. NULL on error. + */ +struct rte_ip_frag_tbl * rte_ip_frag_table_create(uint32_t bucket_num, + uint32_t bucket_entries, uint32_t max_entries, + uint64_t max_cycles, int socket_id); + +/* + * Free allocated IP fragmentation table. + * + * @param btl + * Fragmentation table to free. + */ +static inline void +rte_ip_frag_table_destroy( struct rte_ip_frag_tbl *tbl) +{ + rte_free(tbl); +} + +/** + * This function implements the fragmentation of IPv6 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param nb_pkts_out + * Number of fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6 + * datagrams. This value includes the size of the IPv6 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * errno. + */ +int32_t +rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect); + +/* + * This function implements reassembly of fragmented IPv6 packets. + * Incoming mbuf should have its l2_len/l3_len fields setup correctly. + * + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param dr + * Death row to free buffers to + * @param mb + * Incoming mbuf with IPv6 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPv6 header. + * @param frag_hdr + * Pointer to the IPv6 fragment extension header. + * @return + * Pointer to mbuf for reassembled packet, or NULL if: + * - an error occured. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf *rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct ipv6_hdr *ip_hdr, + struct ipv6_extension_fragment *frag_hdr); + +/* + * Return a pointer to the packet's fragment header, if found. + * It only looks at the extension header that's right after the fixed IPv6 + * header, and doesn't follow the whole chain of extension headers. + * + * @param hdr + * Pointer to the IPv6 header. + * @return + * Pointer to the IPv6 fragment extension header, or NULL if it's not + * present. + */ +static inline struct ipv6_extension_fragment * +rte_ipv6_frag_get_ipv6_fragment_header(struct ipv6_hdr *hdr) +{ + if (hdr->proto == IPPROTO_FRAGMENT) { + return (struct ipv6_extension_fragment *) ++hdr; + } + else + return NULL; +} + +/** + * IPv4 fragmentation. + * + * This function implements the fragmentation of IPv4 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param nb_pkts_out + * Number of fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 + * datagrams. This value includes the size of the IPv4 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * errno. + */ +int32_t rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect); + +/* + * This function implements reassembly of fragmented IPv4 packets. + * Incoming mbufs should have its l2_len/l3_len fields setup correclty. + * + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param dr + * Death row to free buffers to + * @param mb + * Incoming mbuf with IPv4 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV4 header inside the fragment. + * @return + * Pointer to mbuf for reassebled packet, or NULL if: + * - an error occured. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf * rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, + struct rte_mbuf *mb, uint64_t tms, struct ipv4_hdr *ip_hdr); + +/* + * Check if the IPv4 packet is fragmented + * + * @param hdr + * IPv4 header of the packet + * @return + * 1 if fragmented, 0 if not fragmented + */ +static inline int +rte_ipv4_frag_pkt_is_fragmented(const struct ipv4_hdr * hdr) { + uint16_t flag_offset, ip_flag, ip_ofs; + + flag_offset = rte_be_to_cpu_16(hdr->fragment_offset); + ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); + ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); + + return ip_flag != 0 || ip_ofs != 0; +} + +/* + * Free mbufs on a given death row. + * + * @param dr + * Death row to free mbufs in. + * @param prefetch + * How many buffers to prefetch before freeing. + */ +void rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, + uint32_t prefetch); + + +/* + * Dump fragmentation table statistics to file. + * + * @param f + * File to dump statistics to + * @param tbl + * Fragmentation table to dump statistics from + */ +void +rte_ip_frag_table_statistics_dump(FILE * f, const struct rte_ip_frag_tbl *tbl); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IP_FRAG_H_ */ diff --git a/lib/librte_ip_frag/rte_ip_frag_common.c b/lib/librte_ip_frag/rte_ip_frag_common.c new file mode 100644 index 00000000..6176ff4e --- /dev/null +++ b/lib/librte_ip_frag/rte_ip_frag_common.c @@ -0,0 +1,139 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include + +#include "ip_frag_common.h" + +#define IP_FRAG_HASH_FNUM 2 + +/* free mbufs from death row */ +void +rte_ip_frag_free_death_row(struct rte_ip_frag_death_row *dr, + uint32_t prefetch) +{ + uint32_t i, k, n; + + k = RTE_MIN(prefetch, dr->cnt); + n = dr->cnt; + + for (i = 0; i != k; i++) + rte_prefetch0(dr->row[i]); + + for (i = 0; i != n - k; i++) { + rte_prefetch0(dr->row[i + k]); + rte_pktmbuf_free(dr->row[i]); + } + + for (; i != n; i++) + rte_pktmbuf_free(dr->row[i]); + + dr->cnt = 0; +} + +/* create fragmentation table */ +struct rte_ip_frag_tbl * +rte_ip_frag_table_create(uint32_t bucket_num, uint32_t bucket_entries, + uint32_t max_entries, uint64_t max_cycles, int socket_id) +{ + struct rte_ip_frag_tbl *tbl; + size_t sz; + uint64_t nb_entries; + + nb_entries = rte_align32pow2(bucket_num); + nb_entries *= bucket_entries; + nb_entries *= IP_FRAG_HASH_FNUM; + + /* check input parameters. */ + if (rte_is_power_of_2(bucket_entries) == 0 || + nb_entries > UINT32_MAX || nb_entries == 0 || + nb_entries < max_entries) { + RTE_LOG(ERR, USER1, "%s: invalid input parameter\n", __func__); + return NULL; + } + + sz = sizeof (*tbl) + nb_entries * sizeof (tbl->pkt[0]); + if ((tbl = rte_zmalloc_socket(__func__, sz, RTE_CACHE_LINE_SIZE, + socket_id)) == NULL) { + RTE_LOG(ERR, USER1, + "%s: allocation of %zu bytes at socket %d failed do\n", + __func__, sz, socket_id); + return NULL; + } + + RTE_LOG(INFO, USER1, "%s: allocated of %zu bytes at socket %d\n", + __func__, sz, socket_id); + + tbl->max_cycles = max_cycles; + tbl->max_entries = max_entries; + tbl->nb_entries = (uint32_t)nb_entries; + tbl->nb_buckets = bucket_num; + tbl->bucket_entries = bucket_entries; + tbl->entry_mask = (tbl->nb_entries - 1) & ~(tbl->bucket_entries - 1); + + TAILQ_INIT(&(tbl->lru)); + return tbl; +} + +/* dump frag table statistics to file */ +void +rte_ip_frag_table_statistics_dump(FILE *f, const struct rte_ip_frag_tbl *tbl) +{ + uint64_t fail_total, fail_nospace; + + fail_total = tbl->stat.fail_total; + fail_nospace = tbl->stat.fail_nospace; + + fprintf(f, "max entries:\t%u;\n" + "entries in use:\t%u;\n" + "finds/inserts:\t%" PRIu64 ";\n" + "entries added:\t%" PRIu64 ";\n" + "entries deleted by timeout:\t%" PRIu64 ";\n" + "entries reused by timeout:\t%" PRIu64 ";\n" + "total add failures:\t%" PRIu64 ";\n" + "add no-space failures:\t%" PRIu64 ";\n" + "add hash-collisions failures:\t%" PRIu64 ";\n", + tbl->max_entries, + tbl->use_entries, + tbl->stat.find_num, + tbl->stat.add_num, + tbl->stat.del_num, + tbl->stat.reuse_num, + fail_total, + fail_nospace, + fail_total - fail_nospace); +} diff --git a/lib/librte_ip_frag/rte_ipfrag_version.map b/lib/librte_ip_frag/rte_ipfrag_version.map new file mode 100644 index 00000000..354fa082 --- /dev/null +++ b/lib/librte_ip_frag/rte_ipfrag_version.map @@ -0,0 +1,13 @@ +DPDK_2.0 { + global: + + rte_ip_frag_free_death_row; + rte_ip_frag_table_create; + rte_ip_frag_table_statistics_dump; + rte_ipv4_frag_reassemble_packet; + rte_ipv4_fragment_packet; + rte_ipv6_frag_reassemble_packet; + rte_ipv6_fragment_packet; + + local: *; +}; diff --git a/lib/librte_ip_frag/rte_ipv4_fragmentation.c b/lib/librte_ip_frag/rte_ipv4_fragmentation.c new file mode 100644 index 00000000..a4ed9238 --- /dev/null +++ b/lib/librte_ip_frag/rte_ipv4_fragmentation.c @@ -0,0 +1,209 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include + +#include "ip_frag_common.h" + +/* Fragment Offset */ +#define IPV4_HDR_DF_SHIFT 14 +#define IPV4_HDR_MF_SHIFT 13 +#define IPV4_HDR_FO_SHIFT 3 + +#define IPV4_HDR_DF_MASK (1 << IPV4_HDR_DF_SHIFT) +#define IPV4_HDR_MF_MASK (1 << IPV4_HDR_MF_SHIFT) + +#define IPV4_HDR_FO_MASK ((1 << IPV4_HDR_FO_SHIFT) - 1) + +static inline void __fill_ipv4hdr_frag(struct ipv4_hdr *dst, + const struct ipv4_hdr *src, uint16_t len, uint16_t fofs, + uint16_t dofs, uint32_t mf) +{ + rte_memcpy(dst, src, sizeof(*dst)); + fofs = (uint16_t)(fofs + (dofs >> IPV4_HDR_FO_SHIFT)); + fofs = (uint16_t)(fofs | mf << IPV4_HDR_MF_SHIFT); + dst->fragment_offset = rte_cpu_to_be_16(fofs); + dst->total_length = rte_cpu_to_be_16(len); + dst->hdr_checksum = 0; +} + +static inline void __free_fragments(struct rte_mbuf *mb[], uint32_t num) +{ + uint32_t i; + for (i = 0; i != num; i++) + rte_pktmbuf_free(mb[i]); +} + +/** + * IPv4 fragmentation. + * + * This function implements the fragmentation of IPv4 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv4 + * datagrams. This value includes the size of the IPv4 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * . + */ +int32_t +rte_ipv4_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect) +{ + struct rte_mbuf *in_seg = NULL; + struct ipv4_hdr *in_hdr; + uint32_t out_pkt_pos, in_seg_data_pos; + uint32_t more_in_segs; + uint16_t fragment_offset, flag_offset, frag_size; + + frag_size = (uint16_t)(mtu_size - sizeof(struct ipv4_hdr)); + + /* Fragment size should be a multiply of 8. */ + IP_FRAG_ASSERT((frag_size & IPV4_HDR_FO_MASK) == 0); + + in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv4_hdr *); + flag_offset = rte_cpu_to_be_16(in_hdr->fragment_offset); + + /* If Don't Fragment flag is set */ + if (unlikely ((flag_offset & IPV4_HDR_DF_MASK) != 0)) + return -ENOTSUP; + + /* Check that pkts_out is big enough to hold all fragments */ + if (unlikely(frag_size * nb_pkts_out < + (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv4_hdr)))) + return -EINVAL; + + in_seg = pkt_in; + in_seg_data_pos = sizeof(struct ipv4_hdr); + out_pkt_pos = 0; + fragment_offset = 0; + + more_in_segs = 1; + while (likely(more_in_segs)) { + struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; + uint32_t more_out_segs; + struct ipv4_hdr *out_hdr; + + /* Allocate direct buffer */ + out_pkt = rte_pktmbuf_alloc(pool_direct); + if (unlikely(out_pkt == NULL)) { + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + + /* Reserve space for the IP header that will be built later */ + out_pkt->data_len = sizeof(struct ipv4_hdr); + out_pkt->pkt_len = sizeof(struct ipv4_hdr); + + out_seg_prev = out_pkt; + more_out_segs = 1; + while (likely(more_out_segs && more_in_segs)) { + struct rte_mbuf *out_seg = NULL; + uint32_t len; + + /* Allocate indirect buffer */ + out_seg = rte_pktmbuf_alloc(pool_indirect); + if (unlikely(out_seg == NULL)) { + rte_pktmbuf_free(out_pkt); + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + out_seg_prev->next = out_seg; + out_seg_prev = out_seg; + + /* Prepare indirect buffer */ + rte_pktmbuf_attach(out_seg, in_seg); + len = mtu_size - out_pkt->pkt_len; + if (len > (in_seg->data_len - in_seg_data_pos)) { + len = in_seg->data_len - in_seg_data_pos; + } + out_seg->data_off = in_seg->data_off + in_seg_data_pos; + out_seg->data_len = (uint16_t)len; + out_pkt->pkt_len = (uint16_t)(len + + out_pkt->pkt_len); + out_pkt->nb_segs += 1; + in_seg_data_pos += len; + + /* Current output packet (i.e. fragment) done ? */ + if (unlikely(out_pkt->pkt_len >= mtu_size)) + more_out_segs = 0; + + /* Current input segment done ? */ + if (unlikely(in_seg_data_pos == in_seg->data_len)) { + in_seg = in_seg->next; + in_seg_data_pos = 0; + + if (unlikely(in_seg == NULL)) + more_in_segs = 0; + } + } + + /* Build the IP header */ + + out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv4_hdr *); + + __fill_ipv4hdr_frag(out_hdr, in_hdr, + (uint16_t)out_pkt->pkt_len, + flag_offset, fragment_offset, more_in_segs); + + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt_len - sizeof(struct ipv4_hdr)); + + out_pkt->ol_flags |= PKT_TX_IP_CKSUM; + out_pkt->l3_len = sizeof(struct ipv4_hdr); + + /* Write the fragment to the output list */ + pkts_out[out_pkt_pos] = out_pkt; + out_pkt_pos ++; + } + + return out_pkt_pos; +} diff --git a/lib/librte_ip_frag/rte_ipv4_reassembly.c b/lib/librte_ip_frag/rte_ipv4_reassembly.c new file mode 100644 index 00000000..26d07f9a --- /dev/null +++ b/lib/librte_ip_frag/rte_ipv4_reassembly.c @@ -0,0 +1,184 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + +#include "ip_frag_common.h" + +/* + * Reassemble fragments into one packet. + */ +struct rte_mbuf * +ipv4_frag_reassemble(const struct ip_frag_pkt *fp) +{ + struct ipv4_hdr *ip_hdr; + struct rte_mbuf *m, *prev; + uint32_t i, n, ofs, first_len; + + first_len = fp->frags[IP_FIRST_FRAG_IDX].len; + n = fp->last_idx - 1; + + /*start from the last fragment. */ + m = fp->frags[IP_LAST_FRAG_IDX].mb; + ofs = fp->frags[IP_LAST_FRAG_IDX].ofs; + + while (ofs != first_len) { + + prev = m; + + for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) { + + /* previous fragment found. */ + if(fp->frags[i].ofs + fp->frags[i].len == ofs) { + + /* adjust start of the last fragment data. */ + rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[i].mb, m); + + /* update our last fragment and offset. */ + m = fp->frags[i].mb; + ofs = fp->frags[i].ofs; + } + } + + /* error - hole in the packet. */ + if (m == prev) { + return NULL; + } + } + + /* chain with the first fragment. */ + rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + m = fp->frags[IP_FIRST_FRAG_IDX].mb; + + /* update mbuf fields for reassembled packet. */ + m->ol_flags |= PKT_TX_IP_CKSUM; + + /* update ipv4 header for the reassmebled packet */ + ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv4_hdr *, m->l2_len); + + ip_hdr->total_length = rte_cpu_to_be_16((uint16_t)(fp->total_size + + m->l3_len)); + ip_hdr->fragment_offset = (uint16_t)(ip_hdr->fragment_offset & + rte_cpu_to_be_16(IPV4_HDR_DF_FLAG)); + ip_hdr->hdr_checksum = 0; + + return m; +} + +/* + * Process new mbuf with fragment of IPV4 packet. + * Incoming mbuf should have it's l2_len/l3_len fields setuped correclty. + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param mb + * Incoming mbuf with IPV4 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV4 header inside the fragment. + * @return + * Pointer to mbuf for reassebled packet, or NULL if: + * - an error occured. + * - not all fragments of the packet are collected yet. + */ +struct rte_mbuf * +rte_ipv4_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv4_hdr *ip_hdr) +{ + struct ip_frag_pkt *fp; + struct ip_frag_key key; + const unaligned_uint64_t *psd; + uint16_t ip_len; + uint16_t flag_offset, ip_ofs, ip_flag; + + flag_offset = rte_be_to_cpu_16(ip_hdr->fragment_offset); + ip_ofs = (uint16_t)(flag_offset & IPV4_HDR_OFFSET_MASK); + ip_flag = (uint16_t)(flag_offset & IPV4_HDR_MF_FLAG); + + psd = (unaligned_uint64_t *)&ip_hdr->src_addr; + /* use first 8 bytes only */ + key.src_dst[0] = psd[0]; + key.id = ip_hdr->packet_id; + key.key_len = IPV4_KEYLEN; + + ip_ofs *= IPV4_HDR_OFFSET_UNITS; + ip_len = (uint16_t)(rte_be_to_cpu_16(ip_hdr->total_length) - + mb->l3_len); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p, tms: %" PRIu64 + ", key: <%" PRIx64 ", %#x>, ofs: %u, len: %u, flags: %#x\n" + "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " + "max_entries: %u, use_entries: %u\n\n", + __func__, __LINE__, + mb, tms, key.src_dst[0], key.id, ip_ofs, ip_len, ip_flag, + tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, + tbl->use_entries); + + /* try to find/add entry into the fragment's table. */ + if ((fp = ip_frag_find(tbl, dr, &key, tms)) == NULL) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst[0], fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + + /* process the fragmented packet. */ + mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, ip_flag); + ip_frag_inuse(tbl, fp); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv4_frag_pkt: %p, key: <%" PRIx64 ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, mb, + tbl, tbl->max_entries, tbl->use_entries, + fp, fp->key.src_dst[0], fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + return mb; +} diff --git a/lib/librte_ip_frag/rte_ipv6_fragmentation.c b/lib/librte_ip_frag/rte_ipv6_fragmentation.c new file mode 100644 index 00000000..1e30004f --- /dev/null +++ b/lib/librte_ip_frag/rte_ipv6_fragmentation.c @@ -0,0 +1,207 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include + +#include "ip_frag_common.h" + +/** + * @file + * RTE IPv6 Fragmentation + * + * Implementation of IPv6 fragmentation. + * + */ + +static inline void +__fill_ipv6hdr_frag(struct ipv6_hdr *dst, + const struct ipv6_hdr *src, uint16_t len, uint16_t fofs, + uint32_t mf) +{ + struct ipv6_extension_fragment *fh; + + rte_memcpy(dst, src, sizeof(*dst)); + dst->payload_len = rte_cpu_to_be_16(len); + dst->proto = IPPROTO_FRAGMENT; + + fh = (struct ipv6_extension_fragment *) ++dst; + fh->next_header = src->proto; + fh->reserved = 0; + fh->frag_data = rte_cpu_to_be_16(RTE_IPV6_SET_FRAG_DATA(fofs, mf)); + fh->id = 0; +} + +static inline void +__free_fragments(struct rte_mbuf *mb[], uint32_t num) +{ + uint32_t i; + for (i = 0; i < num; i++) + rte_pktmbuf_free(mb[i]); +} + +/** + * IPv6 fragmentation. + * + * This function implements the fragmentation of IPv6 packets. + * + * @param pkt_in + * The input packet. + * @param pkts_out + * Array storing the output fragments. + * @param mtu_size + * Size in bytes of the Maximum Transfer Unit (MTU) for the outgoing IPv6 + * datagrams. This value includes the size of the IPv6 header. + * @param pool_direct + * MBUF pool used for allocating direct buffers for the output fragments. + * @param pool_indirect + * MBUF pool used for allocating indirect buffers for the output fragments. + * @return + * Upon successful completion - number of output fragments placed + * in the pkts_out array. + * Otherwise - (-1) * . + */ +int32_t +rte_ipv6_fragment_packet(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect) +{ + struct rte_mbuf *in_seg = NULL; + struct ipv6_hdr *in_hdr; + uint32_t out_pkt_pos, in_seg_data_pos; + uint32_t more_in_segs; + uint16_t fragment_offset, frag_size; + + frag_size = (uint16_t)(mtu_size - sizeof(struct ipv6_hdr)); + + /* Fragment size should be a multiple of 8. */ + IP_FRAG_ASSERT((frag_size & ~RTE_IPV6_EHDR_FO_MASK) == 0); + + /* Check that pkts_out is big enough to hold all fragments */ + if (unlikely (frag_size * nb_pkts_out < + (uint16_t)(pkt_in->pkt_len - sizeof (struct ipv6_hdr)))) + return -EINVAL; + + in_hdr = rte_pktmbuf_mtod(pkt_in, struct ipv6_hdr *); + + in_seg = pkt_in; + in_seg_data_pos = sizeof(struct ipv6_hdr); + out_pkt_pos = 0; + fragment_offset = 0; + + more_in_segs = 1; + while (likely(more_in_segs)) { + struct rte_mbuf *out_pkt = NULL, *out_seg_prev = NULL; + uint32_t more_out_segs; + struct ipv6_hdr *out_hdr; + + /* Allocate direct buffer */ + out_pkt = rte_pktmbuf_alloc(pool_direct); + if (unlikely(out_pkt == NULL)) { + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + + /* Reserve space for the IP header that will be built later */ + out_pkt->data_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment); + out_pkt->pkt_len = sizeof(struct ipv6_hdr) + sizeof(struct ipv6_extension_fragment); + + out_seg_prev = out_pkt; + more_out_segs = 1; + while (likely(more_out_segs && more_in_segs)) { + struct rte_mbuf *out_seg = NULL; + uint32_t len; + + /* Allocate indirect buffer */ + out_seg = rte_pktmbuf_alloc(pool_indirect); + if (unlikely(out_seg == NULL)) { + rte_pktmbuf_free(out_pkt); + __free_fragments(pkts_out, out_pkt_pos); + return -ENOMEM; + } + out_seg_prev->next = out_seg; + out_seg_prev = out_seg; + + /* Prepare indirect buffer */ + rte_pktmbuf_attach(out_seg, in_seg); + len = mtu_size - out_pkt->pkt_len; + if (len > (in_seg->data_len - in_seg_data_pos)) { + len = in_seg->data_len - in_seg_data_pos; + } + out_seg->data_off = in_seg->data_off + in_seg_data_pos; + out_seg->data_len = (uint16_t)len; + out_pkt->pkt_len = (uint16_t)(len + + out_pkt->pkt_len); + out_pkt->nb_segs += 1; + in_seg_data_pos += len; + + /* Current output packet (i.e. fragment) done ? */ + if (unlikely(out_pkt->pkt_len >= mtu_size)) { + more_out_segs = 0; + } + + /* Current input segment done ? */ + if (unlikely(in_seg_data_pos == in_seg->data_len)) { + in_seg = in_seg->next; + in_seg_data_pos = 0; + + if (unlikely(in_seg == NULL)) { + more_in_segs = 0; + } + } + } + + /* Build the IP header */ + + out_hdr = rte_pktmbuf_mtod(out_pkt, struct ipv6_hdr *); + + __fill_ipv6hdr_frag(out_hdr, in_hdr, + (uint16_t) out_pkt->pkt_len - sizeof(struct ipv6_hdr), + fragment_offset, more_in_segs); + + fragment_offset = (uint16_t)(fragment_offset + + out_pkt->pkt_len - sizeof(struct ipv6_hdr) + - sizeof(struct ipv6_extension_fragment)); + + /* Write the fragment to the output list */ + pkts_out[out_pkt_pos] = out_pkt; + out_pkt_pos ++; + } + + return out_pkt_pos; +} diff --git a/lib/librte_ip_frag/rte_ipv6_reassembly.c b/lib/librte_ip_frag/rte_ipv6_reassembly.c new file mode 100644 index 00000000..d29cb1da --- /dev/null +++ b/lib/librte_ip_frag/rte_ipv6_reassembly.c @@ -0,0 +1,225 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include + +#include "ip_frag_common.h" + +/** + * @file + * IPv6 reassemble + * + * Implementation of IPv6 reassembly. + * + */ + +static inline void +ip_frag_memmove(char *dst, char *src, int len) +{ + int i; + + /* go backwards to make sure we don't overwrite anything important */ + for (i = len - 1; i >= 0; i--) + dst[i] = src[i]; +} + +/* + * Reassemble fragments into one packet. + */ +struct rte_mbuf * +ipv6_frag_reassemble(const struct ip_frag_pkt *fp) +{ + struct ipv6_hdr *ip_hdr; + struct ipv6_extension_fragment *frag_hdr; + struct rte_mbuf *m, *prev; + uint32_t i, n, ofs, first_len; + uint32_t last_len, move_len, payload_len; + + first_len = fp->frags[IP_FIRST_FRAG_IDX].len; + n = fp->last_idx - 1; + + /*start from the last fragment. */ + m = fp->frags[IP_LAST_FRAG_IDX].mb; + ofs = fp->frags[IP_LAST_FRAG_IDX].ofs; + last_len = fp->frags[IP_LAST_FRAG_IDX].len; + + payload_len = ofs + last_len; + + while (ofs != first_len) { + + prev = m; + + for (i = n; i != IP_FIRST_FRAG_IDX && ofs != first_len; i--) { + + /* previous fragment found. */ + if (fp->frags[i].ofs + fp->frags[i].len == ofs) { + + /* adjust start of the last fragment data. */ + rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[i].mb, m); + + /* update our last fragment and offset. */ + m = fp->frags[i].mb; + ofs = fp->frags[i].ofs; + } + } + + /* error - hole in the packet. */ + if (m == prev) { + return NULL; + } + } + + /* chain with the first fragment. */ + rte_pktmbuf_adj(m, (uint16_t)(m->l2_len + m->l3_len)); + rte_pktmbuf_chain(fp->frags[IP_FIRST_FRAG_IDX].mb, m); + m = fp->frags[IP_FIRST_FRAG_IDX].mb; + + /* update mbuf fields for reassembled packet. */ + m->ol_flags |= PKT_TX_IP_CKSUM; + + /* update ipv6 header for the reassembled datagram */ + ip_hdr = rte_pktmbuf_mtod_offset(m, struct ipv6_hdr *, m->l2_len); + + ip_hdr->payload_len = rte_cpu_to_be_16(payload_len); + + /* + * remove fragmentation header. note that per RFC2460, we need to update + * the last non-fragmentable header with the "next header" field to contain + * type of the first fragmentable header, but we currently don't support + * other headers, so we assume there are no other headers and thus update + * the main IPv6 header instead. + */ + move_len = m->l2_len + m->l3_len - sizeof(*frag_hdr); + frag_hdr = (struct ipv6_extension_fragment *) (ip_hdr + 1); + ip_hdr->proto = frag_hdr->next_header; + + ip_frag_memmove(rte_pktmbuf_mtod_offset(m, char *, sizeof(*frag_hdr)), + rte_pktmbuf_mtod(m, char*), move_len); + + rte_pktmbuf_adj(m, sizeof(*frag_hdr)); + + return m; +} + +/* + * Process new mbuf with fragment of IPV6 datagram. + * Incoming mbuf should have its l2_len/l3_len fields setup correctly. + * @param tbl + * Table where to lookup/add the fragmented packet. + * @param mb + * Incoming mbuf with IPV6 fragment. + * @param tms + * Fragment arrival timestamp. + * @param ip_hdr + * Pointer to the IPV6 header. + * @param frag_hdr + * Pointer to the IPV6 fragment extension header. + * @return + * Pointer to mbuf for reassembled packet, or NULL if: + * - an error occured. + * - not all fragments of the packet are collected yet. + */ +#define MORE_FRAGS(x) (((x) & 0x100) >> 8) +#define FRAG_OFFSET(x) (rte_cpu_to_be_16(x) >> 3) +struct rte_mbuf * +rte_ipv6_frag_reassemble_packet(struct rte_ip_frag_tbl *tbl, + struct rte_ip_frag_death_row *dr, struct rte_mbuf *mb, uint64_t tms, + struct ipv6_hdr *ip_hdr, struct ipv6_extension_fragment *frag_hdr) +{ + struct ip_frag_pkt *fp; + struct ip_frag_key key; + uint16_t ip_len, ip_ofs; + + rte_memcpy(&key.src_dst[0], ip_hdr->src_addr, 16); + rte_memcpy(&key.src_dst[2], ip_hdr->dst_addr, 16); + + key.id = frag_hdr->id; + key.key_len = IPV6_KEYLEN; + + ip_ofs = FRAG_OFFSET(frag_hdr->frag_data) * 8; + + /* + * as per RFC2460, payload length contains all extension headers as well. + * since we don't support anything but frag headers, this is what we remove + * from the payload len. + */ + ip_len = rte_be_to_cpu_16(ip_hdr->payload_len) - sizeof(*frag_hdr); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p, tms: %" PRIu64 + ", key: <" IPv6_KEY_BYTES_FMT ", %#x>, ofs: %u, len: %u, flags: %#x\n" + "tbl: %p, max_cycles: %" PRIu64 ", entry_mask: %#x, " + "max_entries: %u, use_entries: %u\n\n", + __func__, __LINE__, + mb, tms, IPv6_KEY_BYTES(key.src_dst), key.id, ip_ofs, ip_len, + RTE_IPV6_GET_MF(frag_hdr->frag_data), + tbl, tbl->max_cycles, tbl->entry_mask, tbl->max_entries, + tbl->use_entries); + + /* try to find/add entry into the fragment's table. */ + fp = ip_frag_find(tbl, dr, &key, tms); + if (fp == NULL) { + IP_FRAG_MBUF2DR(dr, mb); + return NULL; + } + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, + tbl, tbl->max_entries, tbl->use_entries, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + + /* process the fragmented packet. */ + mb = ip_frag_process(fp, dr, mb, ip_ofs, ip_len, + MORE_FRAGS(frag_hdr->frag_data)); + ip_frag_inuse(tbl, fp); + + IP_FRAG_LOG(DEBUG, "%s:%d:\n" + "mbuf: %p\n" + "tbl: %p, max_entries: %u, use_entries: %u\n" + "ipv6_frag_pkt: %p, key: <" IPv6_KEY_BYTES_FMT ", %#x>, start: %" PRIu64 + ", total_size: %u, frag_size: %u, last_idx: %u\n\n", + __func__, __LINE__, mb, + tbl, tbl->max_entries, tbl->use_entries, + fp, IPv6_KEY_BYTES(fp->key.src_dst), fp->key.id, fp->start, + fp->total_size, fp->frag_size, fp->last_idx); + + return mb; +} diff --git a/lib/librte_ivshmem/Makefile b/lib/librte_ivshmem/Makefile new file mode 100644 index 00000000..16defdba --- /dev/null +++ b/lib/librte_ivshmem/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_ivshmem.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_ivshmem_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_IVSHMEM) := rte_ivshmem.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_IVSHMEM)-include := rte_ivshmem.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_IVSHMEM) += lib/librte_mempool + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ivshmem/rte_ivshmem.c b/lib/librte_ivshmem/rte_ivshmem.c new file mode 100644 index 00000000..c8b332ce --- /dev/null +++ b/lib/librte_ivshmem/rte_ivshmem.c @@ -0,0 +1,905 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ivshmem.h" + +#define IVSHMEM_CONFIG_FILE_FMT "/var/run/.dpdk_ivshmem_metadata_%s" +#define IVSHMEM_QEMU_CMD_LINE_HEADER_FMT "-device ivshmem,size=%" PRIu64 "M,shm=fd%s" +#define IVSHMEM_QEMU_CMD_FD_FMT ":%s:0x%" PRIx64 ":0x%" PRIx64 +#define IVSHMEM_QEMU_CMDLINE_BUFSIZE 1024 +#define IVSHMEM_MAX_PAGES (1 << 12) +#define adjacent(x,y) (((x).phys_addr+(x).len)==(y).phys_addr) +#define METADATA_SIZE_ALIGNED \ + (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) + +#define GET_PAGEMAP_ADDR(in,addr,dlm,err) \ +{ \ + char *end; \ + errno = 0; \ + addr = strtoull((in), &end, 16); \ + if (errno != 0 || *end != (dlm)) { \ + RTE_LOG(ERR, EAL, err); \ + goto error; \ + } \ + (in) = end + 1; \ +} + +static int pagesz; + +struct memseg_cache_entry { + char filepath[PATH_MAX]; + uint64_t offset; + uint64_t len; +}; + +struct ivshmem_config { + struct rte_ivshmem_metadata * metadata; + struct memseg_cache_entry memseg_cache[IVSHMEM_MAX_PAGES]; + /**< account for multiple files per segment case */ + struct flock lock; + rte_spinlock_t sl; +}; + +static struct ivshmem_config +ivshmem_global_config[RTE_LIBRTE_IVSHMEM_MAX_METADATA_FILES]; + +static rte_spinlock_t global_cfg_sl; + +static struct ivshmem_config * +get_config_by_name(const char * name) +{ + struct rte_ivshmem_metadata * config; + unsigned i; + + for (i = 0; i < RTE_DIM(ivshmem_global_config); i++) { + config = ivshmem_global_config[i].metadata; + if (config == NULL) + return NULL; + if (strncmp(name, config->name, IVSHMEM_NAME_LEN) == 0) + return &ivshmem_global_config[i]; + } + + return NULL; +} + +static int +overlap(const struct rte_memzone * s1, const struct rte_memzone * s2) +{ + uint64_t start1, end1, start2, end2; + + start1 = s1->addr_64; + end1 = s1->addr_64 + s1->len; + start2 = s2->addr_64; + end2 = s2->addr_64 + s2->len; + + if (start1 >= start2 && start1 < end2) + return 1; + if (start2 >= start1 && start2 < end1) + return 1; + + return 0; +} + +static struct rte_memzone * +get_memzone_by_addr(const void * addr) +{ + struct rte_memzone * tmp, * mz; + struct rte_mem_config * mcfg; + int i; + + mcfg = rte_eal_get_configuration()->mem_config; + mz = NULL; + + /* find memzone for the ring */ + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + tmp = &mcfg->memzone[i]; + + if (tmp->addr_64 == (uint64_t) addr) { + mz = tmp; + break; + } + } + + return mz; +} + +static int +entry_compare(const void * a, const void * b) +{ + const struct rte_ivshmem_metadata_entry * e1 = + (const struct rte_ivshmem_metadata_entry*) a; + const struct rte_ivshmem_metadata_entry * e2 = + (const struct rte_ivshmem_metadata_entry*) b; + + /* move unallocated zones to the end */ + if (e1->mz.addr == NULL && e2->mz.addr == NULL) + return 0; + if (e1->mz.addr == 0) + return 1; + if (e2->mz.addr == 0) + return -1; + + return e1->mz.phys_addr > e2->mz.phys_addr; +} + +/* fills hugepage cache entry for a given start virt_addr */ +static int +get_hugefile_by_virt_addr(uint64_t virt_addr, struct memseg_cache_entry * e) +{ + uint64_t start_addr, end_addr; + char *start,*path_end; + char buf[PATH_MAX*2]; + FILE *f; + + start = NULL; + path_end = NULL; + start_addr = 0; + + memset(e->filepath, 0, sizeof(e->filepath)); + + /* open /proc/self/maps */ + f = fopen("/proc/self/maps", "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "cannot open /proc/self/maps!\n"); + return -1; + } + + /* parse maps */ + while (fgets(buf, sizeof(buf), f) != NULL) { + + /* get endptr to end of start addr */ + start = buf; + + GET_PAGEMAP_ADDR(start,start_addr,'-', + "Cannot find start address in maps!\n"); + + /* if start address is bigger than our address, skip */ + if (start_addr > virt_addr) + continue; + + GET_PAGEMAP_ADDR(start,end_addr,' ', + "Cannot find end address in maps!\n"); + + /* if end address is less than our address, skip */ + if (end_addr <= virt_addr) + continue; + + /* find where the path starts */ + start = strstr(start, "/"); + + if (start == NULL) + continue; + + /* at this point, we know that this is our map. + * now let's find the file */ + path_end = strstr(start, "\n"); + break; + } + + if (path_end == NULL) { + RTE_LOG(ERR, EAL, "Hugefile path not found!\n"); + goto error; + } + + /* calculate offset and copy the file path */ + snprintf(e->filepath, RTE_PTR_DIFF(path_end, start) + 1, "%s", start); + + e->offset = virt_addr - start_addr; + + fclose(f); + + return 0; +error: + fclose(f); + return -1; +} + +/* + * This is a complex function. What it does is the following: + * 1. Goes through metadata and gets list of hugepages involved + * 2. Sorts the hugepages by size (1G first) + * 3. Goes through metadata again and writes correct offsets + * 4. Goes through pages and finds out their filenames, offsets etc. + */ +static int +build_config(struct rte_ivshmem_metadata * metadata) +{ + struct rte_ivshmem_metadata_entry * e_local; + struct memseg_cache_entry * ms_local; + struct rte_memseg pages[IVSHMEM_MAX_PAGES]; + struct rte_ivshmem_metadata_entry *entry; + struct memseg_cache_entry * c_entry, * prev_entry; + struct ivshmem_config * config; + unsigned i, j, mz_iter, ms_iter; + uint64_t biggest_len; + int biggest_idx; + + /* return error if we try to use an unknown config file */ + config = get_config_by_name(metadata->name); + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", metadata->name); + goto fail_e; + } + + memset(pages, 0, sizeof(pages)); + + e_local = malloc(sizeof(config->metadata->entry)); + if (e_local == NULL) + goto fail_e; + ms_local = malloc(sizeof(config->memseg_cache)); + if (ms_local == NULL) + goto fail_ms; + + + /* make local copies before doing anything */ + memcpy(e_local, config->metadata->entry, sizeof(config->metadata->entry)); + memcpy(ms_local, config->memseg_cache, sizeof(config->memseg_cache)); + + qsort(e_local, RTE_DIM(config->metadata->entry), sizeof(struct rte_ivshmem_metadata_entry), + entry_compare); + + /* first pass - collect all huge pages */ + for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) { + + entry = &e_local[mz_iter]; + + uint64_t start_addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, + entry->mz.hugepage_sz); + uint64_t offset = entry->mz.addr_64 - start_addr; + uint64_t len = RTE_ALIGN_CEIL(entry->mz.len + offset, + entry->mz.hugepage_sz); + + if (entry->mz.addr_64 == 0 || start_addr == 0 || len == 0) + continue; + + int start_page; + + /* find first unused page - mz are phys_addr sorted so we don't have to + * look out for holes */ + for (i = 0; i < RTE_DIM(pages); i++) { + + /* skip if we already have this page */ + if (pages[i].addr_64 == start_addr) { + start_addr += entry->mz.hugepage_sz; + len -= entry->mz.hugepage_sz; + continue; + } + /* we found a new page */ + else if (pages[i].addr_64 == 0) { + start_page = i; + break; + } + } + if (i == RTE_DIM(pages)) { + RTE_LOG(ERR, EAL, "Cannot find unused page!\n"); + goto fail; + } + + /* populate however many pages the memzone has */ + for (i = start_page; i < RTE_DIM(pages) && len != 0; i++) { + + pages[i].addr_64 = start_addr; + pages[i].len = entry->mz.hugepage_sz; + start_addr += entry->mz.hugepage_sz; + len -= entry->mz.hugepage_sz; + } + /* if there's still length left */ + if (len != 0) { + RTE_LOG(ERR, EAL, "Not enough space for pages!\n"); + goto fail; + } + } + + /* second pass - sort pages by size */ + for (i = 0; i < RTE_DIM(pages); i++) { + + if (pages[i].addr == NULL) + break; + + biggest_len = 0; + biggest_idx = -1; + + /* + * browse all entries starting at 'i', and find the + * entry with the smallest addr + */ + for (j=i; j< RTE_DIM(pages); j++) { + if (pages[j].addr == NULL) + break; + if (biggest_len == 0 || + pages[j].len > biggest_len) { + biggest_len = pages[j].len; + biggest_idx = j; + } + } + + /* should not happen */ + if (biggest_idx == -1) { + RTE_LOG(ERR, EAL, "Error sorting by size!\n"); + goto fail; + } + if (i != (unsigned) biggest_idx) { + struct rte_memseg tmp; + + memcpy(&tmp, &pages[biggest_idx], sizeof(struct rte_memseg)); + + /* we don't want to break contiguousness, so instead of just + * swapping segments, we move all the preceding segments to the + * right and then put the old segment @ biggest_idx in place of + * segment @ i */ + for (j = biggest_idx - 1; j >= i; j--) { + memcpy(&pages[j+1], &pages[j], sizeof(struct rte_memseg)); + memset(&pages[j], 0, sizeof(struct rte_memseg)); + if (j == 0) + break; + } + + /* put old biggest segment to its new place */ + memcpy(&pages[i], &tmp, sizeof(struct rte_memseg)); + } + } + + /* third pass - write correct offsets */ + for (mz_iter = 0; mz_iter < RTE_DIM(config->metadata->entry); mz_iter++) { + + uint64_t offset = 0; + + entry = &e_local[mz_iter]; + + if (entry->mz.addr_64 == 0) + break; + + /* find page for current memzone */ + for (i = 0; i < RTE_DIM(pages); i++) { + /* we found our page */ + if (entry->mz.addr_64 >= pages[i].addr_64 && + entry->mz.addr_64 < pages[i].addr_64 + pages[i].len) { + entry->offset = (entry->mz.addr_64 - pages[i].addr_64) + + offset; + break; + } + offset += pages[i].len; + } + if (i == RTE_DIM(pages)) { + RTE_LOG(ERR, EAL, "Page not found!\n"); + goto fail; + } + } + + ms_iter = 0; + prev_entry = NULL; + + /* fourth pass - create proper memseg cache */ + for (i = 0; i < RTE_DIM(pages) && + ms_iter <= RTE_DIM(config->memseg_cache); i++) { + if (pages[i].addr_64 == 0) + break; + + + if (ms_iter == RTE_DIM(pages)) { + RTE_LOG(ERR, EAL, "The universe has collapsed!\n"); + goto fail; + } + + c_entry = &ms_local[ms_iter]; + c_entry->len = pages[i].len; + + if (get_hugefile_by_virt_addr(pages[i].addr_64, c_entry) < 0) + goto fail; + + /* if previous entry has the same filename and is contiguous, + * clear current entry and increase previous entry's length + */ + if (prev_entry != NULL && + strncmp(c_entry->filepath, prev_entry->filepath, + sizeof(c_entry->filepath)) == 0 && + prev_entry->offset + prev_entry->len == c_entry->offset) { + prev_entry->len += pages[i].len; + memset(c_entry, 0, sizeof(struct memseg_cache_entry)); + } + else { + prev_entry = c_entry; + ms_iter++; + } + } + + /* update current configuration with new valid data */ + memcpy(config->metadata->entry, e_local, sizeof(config->metadata->entry)); + memcpy(config->memseg_cache, ms_local, sizeof(config->memseg_cache)); + + free(ms_local); + free(e_local); + + return 0; +fail: + free(ms_local); +fail_ms: + free(e_local); +fail_e: + return -1; +} + +static int +add_memzone_to_metadata(const struct rte_memzone * mz, + struct ivshmem_config * config) +{ + struct rte_ivshmem_metadata_entry * entry; + unsigned i, idx; + struct rte_mem_config *mcfg; + + if (mz->len == 0) { + RTE_LOG(ERR, EAL, "Trying to add an empty memzone\n"); + return -1; + } + + rte_spinlock_lock(&config->sl); + + mcfg = rte_eal_get_configuration()->mem_config; + + /* it prevents the memzone being freed while we add it to the metadata */ + rte_rwlock_write_lock(&mcfg->mlock); + + /* find free slot in this config */ + for (i = 0; i < RTE_DIM(config->metadata->entry); i++) { + entry = &config->metadata->entry[i]; + + if (&entry->mz.addr_64 != 0 && overlap(mz, &entry->mz)) { + RTE_LOG(ERR, EAL, "Overlapping memzones!\n"); + goto fail; + } + + /* if addr is zero, the memzone is probably free */ + if (entry->mz.addr_64 == 0) { + RTE_LOG(DEBUG, EAL, "Adding memzone '%s' at %p to metadata %s\n", + mz->name, mz->addr, config->metadata->name); + memcpy(&entry->mz, mz, sizeof(struct rte_memzone)); + + /* run config file parser */ + if (build_config(config->metadata) < 0) + goto fail; + + break; + } + } + + /* if we reached the maximum, that means we have no place in config */ + if (i == RTE_DIM(config->metadata->entry)) { + RTE_LOG(ERR, EAL, "No space left in IVSHMEM metadata %s!\n", + config->metadata->name); + goto fail; + } + + idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); + idx = idx / sizeof(struct rte_memzone); + + /* mark the memzone not freeable */ + mcfg->memzone[idx].ioremap_addr = mz->phys_addr; + + rte_rwlock_write_unlock(&mcfg->mlock); + rte_spinlock_unlock(&config->sl); + return 0; +fail: + rte_rwlock_write_unlock(&mcfg->mlock); + rte_spinlock_unlock(&config->sl); + return -1; +} + +static int +add_ring_to_metadata(const struct rte_ring * r, + struct ivshmem_config * config) +{ + struct rte_memzone * mz; + + mz = get_memzone_by_addr(r); + + if (!mz) { + RTE_LOG(ERR, EAL, "Cannot find memzone for ring!\n"); + return -1; + } + + return add_memzone_to_metadata(mz, config); +} + +static int +add_mempool_to_metadata(const struct rte_mempool * mp, + struct ivshmem_config * config) +{ + struct rte_memzone * mz; + int ret; + + mz = get_memzone_by_addr(mp); + ret = 0; + + if (!mz) { + RTE_LOG(ERR, EAL, "Cannot find memzone for mempool!\n"); + return -1; + } + + /* mempool consists of memzone and ring */ + ret = add_memzone_to_metadata(mz, config); + if (ret < 0) + return -1; + + return add_ring_to_metadata(mp->ring, config); +} + +int +rte_ivshmem_metadata_add_ring(const struct rte_ring * r, const char * name) +{ + struct ivshmem_config * config; + + if (name == NULL || r == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return -1; + } + + return add_ring_to_metadata(r, config); +} + +int +rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, const char * name) +{ + struct ivshmem_config * config; + + if (name == NULL || mz == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return -1; + } + + return add_memzone_to_metadata(mz, config); +} + +int +rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, const char * name) +{ + struct ivshmem_config * config; + + if (name == NULL || mp == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return -1; + } + + return add_mempool_to_metadata(mp, config); +} + +static inline void +ivshmem_config_path(char *buffer, size_t bufflen, const char *name) +{ + snprintf(buffer, bufflen, IVSHMEM_CONFIG_FILE_FMT, name); +} + + + +static inline +void *ivshmem_metadata_create(const char *name, size_t size, + struct flock *lock) +{ + int retval, fd; + void *metadata_addr; + char pathname[PATH_MAX]; + + ivshmem_config_path(pathname, sizeof(pathname), name); + + fd = open(pathname, O_RDWR | O_CREAT, 0660); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open '%s'\n", pathname); + return NULL; + } + + size = METADATA_SIZE_ALIGNED; + + retval = fcntl(fd, F_SETLK, lock); + if (retval < 0){ + close(fd); + RTE_LOG(ERR, EAL, "Cannot create lock on '%s'. Is another " + "process using it?\n", pathname); + return NULL; + } + + retval = ftruncate(fd, size); + if (retval < 0){ + close(fd); + RTE_LOG(ERR, EAL, "Cannot resize '%s'\n", pathname); + return NULL; + } + + metadata_addr = mmap(NULL, size, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (metadata_addr == MAP_FAILED){ + RTE_LOG(ERR, EAL, "Cannot mmap memory for '%s'\n", pathname); + + /* we don't care if we can't unlock */ + fcntl(fd, F_UNLCK, lock); + close(fd); + + return NULL; + } + + return metadata_addr; +} + +int rte_ivshmem_metadata_create(const char *name) +{ + struct ivshmem_config * ivshmem_config; + unsigned index; + + if (pagesz == 0) + pagesz = getpagesize(); + + if (name == NULL) + return -1; + + rte_spinlock_lock(&global_cfg_sl); + + for (index = 0; index < RTE_DIM(ivshmem_global_config); index++) { + if (ivshmem_global_config[index].metadata == NULL) { + ivshmem_config = &ivshmem_global_config[index]; + break; + } + } + + if (index == RTE_DIM(ivshmem_global_config)) { + RTE_LOG(ERR, EAL, "Cannot create more ivshmem config files. " + "Maximum has been reached\n"); + rte_spinlock_unlock(&global_cfg_sl); + return -1; + } + + ivshmem_config->lock.l_type = F_WRLCK; + ivshmem_config->lock.l_whence = SEEK_SET; + + ivshmem_config->lock.l_start = 0; + ivshmem_config->lock.l_len = METADATA_SIZE_ALIGNED; + + ivshmem_global_config[index].metadata = ((struct rte_ivshmem_metadata *) + ivshmem_metadata_create( + name, + sizeof(struct rte_ivshmem_metadata), + &ivshmem_config->lock)); + + if (ivshmem_global_config[index].metadata == NULL) { + rte_spinlock_unlock(&global_cfg_sl); + return -1; + } + + /* Metadata setup */ + memset(ivshmem_config->metadata, 0, sizeof(struct rte_ivshmem_metadata)); + ivshmem_config->metadata->magic_number = IVSHMEM_MAGIC; + snprintf(ivshmem_config->metadata->name, + sizeof(ivshmem_config->metadata->name), "%s", name); + + rte_spinlock_unlock(&global_cfg_sl); + + return 0; +} + +int +rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, const char *name) +{ + const struct memseg_cache_entry * ms_cache, *entry; + struct ivshmem_config * config; + char cmdline[IVSHMEM_QEMU_CMDLINE_BUFSIZE], *cmdline_ptr; + char cfg_file_path[PATH_MAX]; + unsigned remaining_len, tmplen, iter; + uint64_t shared_mem_size, zero_size, total_size; + + if (buffer == NULL || name == NULL) + return -1; + + config = get_config_by_name(name); + + if (config == NULL) { + RTE_LOG(ERR, EAL, "Config %s not found!\n", name); + return -1; + } + + rte_spinlock_lock(&config->sl); + + /* prepare metadata file path */ + snprintf(cfg_file_path, sizeof(cfg_file_path), IVSHMEM_CONFIG_FILE_FMT, + config->metadata->name); + + ms_cache = config->memseg_cache; + + cmdline_ptr = cmdline; + remaining_len = sizeof(cmdline); + + shared_mem_size = 0; + iter = 0; + + while ((ms_cache[iter].len != 0) && (iter < RTE_DIM(config->metadata->entry))) { + + entry = &ms_cache[iter]; + + /* Offset and sizes within the current pathname */ + tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, + entry->filepath, entry->offset, entry->len); + + shared_mem_size += entry->len; + + cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); + remaining_len -= tmplen; + + if (remaining_len == 0) { + RTE_LOG(ERR, EAL, "Command line too long!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + + iter++; + } + + total_size = rte_align64pow2(shared_mem_size + METADATA_SIZE_ALIGNED); + zero_size = total_size - shared_mem_size - METADATA_SIZE_ALIGNED; + + /* add /dev/zero to command-line to fill the space */ + tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, + "/dev/zero", + (uint64_t)0x0, + zero_size); + + cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); + remaining_len -= tmplen; + + if (remaining_len == 0) { + RTE_LOG(ERR, EAL, "Command line too long!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + + /* add metadata file to the end of command-line */ + tmplen = snprintf(cmdline_ptr, remaining_len, IVSHMEM_QEMU_CMD_FD_FMT, + cfg_file_path, + (uint64_t)0x0, + METADATA_SIZE_ALIGNED); + + cmdline_ptr = RTE_PTR_ADD(cmdline_ptr, tmplen); + remaining_len -= tmplen; + + if (remaining_len == 0) { + RTE_LOG(ERR, EAL, "Command line too long!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + + /* if current length of the command line is bigger than the buffer supplied + * by the user, or if command-line is bigger than what IVSHMEM accepts */ + if ((sizeof(cmdline) - remaining_len) > size) { + RTE_LOG(ERR, EAL, "Buffer is too short!\n"); + rte_spinlock_unlock(&config->sl); + return -1; + } + /* complete the command-line */ + snprintf(buffer, size, + IVSHMEM_QEMU_CMD_LINE_HEADER_FMT, + total_size >> 20, + cmdline); + + rte_spinlock_unlock(&config->sl); + + return 0; +} + +void +rte_ivshmem_metadata_dump(FILE *f, const char *name) +{ + unsigned i = 0; + struct ivshmem_config * config; + struct rte_ivshmem_metadata_entry *entry; +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + uint64_t addr; + uint64_t end, hugepage_sz; + struct memseg_cache_entry e; +#endif + + if (name == NULL) + return; + + /* return error if we try to use an unknown config file */ + config = get_config_by_name(name); + if (config == NULL) { + RTE_LOG(ERR, EAL, "Cannot find IVSHMEM config %s!\n", name); + return; + } + + rte_spinlock_lock(&config->sl); + + entry = &config->metadata->entry[0]; + + while (entry->mz.addr != NULL && i < RTE_DIM(config->metadata->entry)) { + + fprintf(f, "Entry %u: name:<%-20s>, phys:0x%-15lx, len:0x%-15lx, " + "virt:%-15p, off:0x%-15lx\n", + i, + entry->mz.name, + entry->mz.phys_addr, + entry->mz.len, + entry->mz.addr, + entry->offset); + i++; + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + fprintf(f, "\tHugepage files:\n"); + + hugepage_sz = entry->mz.hugepage_sz; + addr = RTE_ALIGN_FLOOR(entry->mz.addr_64, hugepage_sz); + end = addr + RTE_ALIGN_CEIL(entry->mz.len + (entry->mz.addr_64 - addr), + hugepage_sz); + + for (; addr < end; addr += hugepage_sz) { + memset(&e, 0, sizeof(e)); + + get_hugefile_by_virt_addr(addr, &e); + + fprintf(f, "\t0x%"PRIx64 "-0x%" PRIx64 " offset: 0x%" PRIx64 " %s\n", + addr, addr + hugepage_sz, e.offset, e.filepath); + } +#endif + entry++; + } + + rte_spinlock_unlock(&config->sl); +} diff --git a/lib/librte_ivshmem/rte_ivshmem.h b/lib/librte_ivshmem/rte_ivshmem.h new file mode 100644 index 00000000..a5d36d6b --- /dev/null +++ b/lib/librte_ivshmem/rte_ivshmem.h @@ -0,0 +1,165 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_IVSHMEM_H_ +#define RTE_IVSHMEM_H_ + +#include +#include + +/** + * @file + * + * The RTE IVSHMEM interface provides functions to create metadata files + * describing memory segments to be shared via QEMU IVSHMEM. + */ + + +#ifdef __cplusplus +extern "C" { +#endif + +#define IVSHMEM_MAGIC 0x0BADC0DE +#define IVSHMEM_NAME_LEN 32 + +/** + * Structure that holds IVSHMEM shared metadata entry. + */ +struct rte_ivshmem_metadata_entry { + struct rte_memzone mz; /**< shared memzone */ + uint64_t offset; /**< offset of memzone within IVSHMEM device */ +}; + +/** + * Structure that holds IVSHMEM metadata. + */ +struct rte_ivshmem_metadata { + int magic_number; /**< magic number */ + char name[IVSHMEM_NAME_LEN]; /**< name of the metadata file */ + struct rte_ivshmem_metadata_entry entry[RTE_LIBRTE_IVSHMEM_MAX_ENTRIES]; + /**< metadata entries */ +}; + +/** + * Creates metadata file with a given name + * + * @param name + * Name of metadata file to be created + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_create(const char * name); + +/** + * Adds memzone to a specific metadata file + * + * @param mz + * Memzone to be added + * @param md_name + * Name of metadata file for the memzone to be added to + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_add_memzone(const struct rte_memzone * mz, + const char * md_name); + +/** + * Adds a ring descriptor to a specific metadata file + * + * @param r + * Ring descriptor to be added + * @param md_name + * Name of metadata file for the ring to be added to + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_add_ring(const struct rte_ring * r, + const char * md_name); + +/** + * Adds a mempool to a specific metadata file + * + * @param mp + * Mempool to be added + * @param md_name + * Name of metadata file for the mempool to be added to + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_add_mempool(const struct rte_mempool * mp, + const char * md_name); + + +/** + * Generates the QEMU command-line for IVSHMEM device for a given metadata file. + * This function is to be called after all the objects were added. + * + * @param buffer + * Buffer to be filled with the command line arguments. + * @param size + * Size of the buffer. + * @param name + * Name of metadata file to generate QEMU command-line parameters for + * + * @return + * - On success, zero + * - On failure, a negative value + */ +int rte_ivshmem_metadata_cmdline_generate(char *buffer, unsigned size, + const char *name); + + +/** + * Dump all metadata entries from a given metadata file to the console. + * + * @param f + * A pointer to a file for output + * @name + * Name of the metadata file to be dumped to console. + */ +void rte_ivshmem_metadata_dump(FILE *f, const char *name); + + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_IVSHMEM_H_ */ diff --git a/lib/librte_ivshmem/rte_ivshmem_version.map b/lib/librte_ivshmem/rte_ivshmem_version.map new file mode 100644 index 00000000..5a393ddc --- /dev/null +++ b/lib/librte_ivshmem/rte_ivshmem_version.map @@ -0,0 +1,12 @@ +DPDK_2.0 { + global: + + rte_ivshmem_metadata_add_mempool; + rte_ivshmem_metadata_add_memzone; + rte_ivshmem_metadata_add_ring; + rte_ivshmem_metadata_cmdline_generate; + rte_ivshmem_metadata_create; + rte_ivshmem_metadata_dump; + + local: *; +}; diff --git a/lib/librte_jobstats/Makefile b/lib/librte_jobstats/Makefile new file mode 100644 index 00000000..136a448e --- /dev/null +++ b/lib/librte_jobstats/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_jobstats.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_jobstats_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_JOBSTATS) := rte_jobstats.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_JOBSTATS)-include := rte_jobstats.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_JOBSTATS) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_jobstats/rte_jobstats.c b/lib/librte_jobstats/rte_jobstats.c new file mode 100644 index 00000000..2b42050c --- /dev/null +++ b/lib/librte_jobstats/rte_jobstats.c @@ -0,0 +1,293 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "rte_jobstats.h" + +#define ADD_TIME_MIN_MAX(obj, type, value) do { \ + typeof(value) tmp = (value); \ + (obj)->type ## _time += tmp; \ + if (tmp < (obj)->min_ ## type ## _time) \ + (obj)->min_ ## type ## _time = tmp; \ + if (tmp > (obj)->max_ ## type ## _time) \ + (obj)->max_ ## type ## _time = tmp; \ +} while (0) + +#define RESET_TIME_MIN_MAX(obj, type) do { \ + (obj)->type ## _time = 0; \ + (obj)->min_ ## type ## _time = UINT64_MAX; \ + (obj)->max_ ## type ## _time = 0; \ +} while (0) + +static inline uint64_t +get_time(void) +{ + rte_rmb(); + return rte_get_timer_cycles(); +} + +/* Those are steps used to adjust job period. + * Experiments show that for forwarding apps the up step must be less than down + * step to achieve optimal performance. + */ +#define JOB_UPDATE_STEP_UP 1 +#define JOB_UPDATE_STEP_DOWN 4 + +/* + * Default update function that implements simple period adjustment. + */ +static void +default_update_function(struct rte_jobstats *job, int64_t result) +{ + int64_t err = job->target - result; + + /* Job is happy. Nothing to do */ + if (err == 0) + return; + + if (err > 0) { + if (job->period + JOB_UPDATE_STEP_UP < job->max_period) + job->period += JOB_UPDATE_STEP_UP; + } else { + if (job->min_period + JOB_UPDATE_STEP_DOWN < job->period) + job->period -= JOB_UPDATE_STEP_DOWN; + } +} + +int +rte_jobstats_context_init(struct rte_jobstats_context *ctx) +{ + if (ctx == NULL) + return -EINVAL; + + /* Init only needed parameters. Zero out everything else. */ + memset(ctx, 0, sizeof(struct rte_jobstats_context)); + + rte_jobstats_context_reset(ctx); + + return 0; +} + +void +rte_jobstats_context_start(struct rte_jobstats_context *ctx) +{ + uint64_t now; + + ctx->loop_executed_jobs = 0; + + now = get_time(); + ADD_TIME_MIN_MAX(ctx, management, now - ctx->state_time); + ctx->state_time = now; +} + +void +rte_jobstats_context_finish(struct rte_jobstats_context *ctx) +{ + uint64_t now; + + if (likely(ctx->loop_executed_jobs)) + ctx->loop_cnt++; + + now = get_time(); + ADD_TIME_MIN_MAX(ctx, management, now - ctx->state_time); + ctx->state_time = now; +} + +void +rte_jobstats_context_reset(struct rte_jobstats_context *ctx) +{ + RESET_TIME_MIN_MAX(ctx, exec); + RESET_TIME_MIN_MAX(ctx, management); + ctx->start_time = get_time(); + ctx->state_time = ctx->start_time; + ctx->job_exec_cnt = 0; + ctx->loop_cnt = 0; +} + +void +rte_jobstats_set_target(struct rte_jobstats *job, int64_t target) +{ + job->target = target; +} + +int +rte_jobstats_start(struct rte_jobstats_context *ctx, struct rte_jobstats *job) +{ + uint64_t now; + + /* Some sanity check. */ + if (unlikely(ctx == NULL || job == NULL || job->context != NULL)) + return -EINVAL; + + /* Link job with context object. */ + job->context = ctx; + + now = get_time(); + ADD_TIME_MIN_MAX(ctx, management, now - ctx->state_time); + ctx->state_time = now; + + return 0; +} + +int +rte_jobstats_abort(struct rte_jobstats *job) +{ + struct rte_jobstats_context *ctx; + uint64_t now, exec_time; + + /* Some sanity check. */ + if (unlikely(job == NULL || job->context == NULL)) + return -EINVAL; + + ctx = job->context; + now = get_time(); + exec_time = now - ctx->state_time; + ADD_TIME_MIN_MAX(ctx, management, exec_time); + ctx->state_time = now; + job->context = NULL; + + return 0; +} + +int +rte_jobstats_finish(struct rte_jobstats *job, int64_t job_value) +{ + struct rte_jobstats_context *ctx; + uint64_t now, exec_time; + int need_update; + + /* Some sanity check. */ + if (unlikely(job == NULL || job->context == NULL)) + return -EINVAL; + + need_update = job->target != job_value; + /* Adjust period only if job is unhappy of its current period. */ + if (need_update) + (*job->update_period_cb)(job, job_value); + + ctx = job->context; + + /* Update execution time is considered as runtime so get time after it is + * executed. */ + now = get_time(); + exec_time = now - ctx->state_time; + ADD_TIME_MIN_MAX(job, exec, exec_time); + ADD_TIME_MIN_MAX(ctx, exec, exec_time); + + ctx->state_time = now; + + ctx->loop_executed_jobs++; + ctx->job_exec_cnt++; + + job->exec_cnt++; + job->context = NULL; + + return need_update; +} + +void +rte_jobstats_set_period(struct rte_jobstats *job, uint64_t period, + uint8_t saturate) +{ + if (saturate != 0) { + if (period < job->min_period) + period = job->min_period; + else if (period > job->max_period) + period = job->max_period; + } + + job->period = period; +} + +void +rte_jobstats_set_min(struct rte_jobstats *job, uint64_t period) +{ + job->min_period = period; + if (job->period < period) + job->period = period; +} + +void +rte_jobstats_set_max(struct rte_jobstats *job, uint64_t period) +{ + job->max_period = period; + if (job->period > period) + job->period = period; +} + +int +rte_jobstats_init(struct rte_jobstats *job, const char *name, + uint64_t min_period, uint64_t max_period, uint64_t initial_period, + int64_t target) +{ + if (job == NULL) + return -EINVAL; + + job->period = initial_period; + job->min_period = min_period; + job->max_period = max_period; + job->target = target; + job->update_period_cb = &default_update_function; + rte_jobstats_reset(job); + snprintf(job->name, RTE_DIM(job->name), "%s", name == NULL ? "" : name); + job->context = NULL; + + return 0; +} + +void +rte_jobstats_set_update_period_function(struct rte_jobstats *job, + rte_job_update_period_cb_t update_period_cb) +{ + if (update_period_cb == NULL) + update_period_cb = default_update_function; + + job->update_period_cb = update_period_cb; +} + +void +rte_jobstats_reset(struct rte_jobstats *job) +{ + RESET_TIME_MIN_MAX(job, exec); + job->exec_cnt = 0; +} diff --git a/lib/librte_jobstats/rte_jobstats.h b/lib/librte_jobstats/rte_jobstats.h new file mode 100644 index 00000000..c2b285f8 --- /dev/null +++ b/lib/librte_jobstats/rte_jobstats.h @@ -0,0 +1,336 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef JOBSTATS_H_ +#define JOBSTATS_H_ + +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_JOBSTATS_NAMESIZE 32 + +/* Forward declarations. */ +struct rte_jobstats_context; +struct rte_jobstats; + +/** + * This function should calculate new period and set it using + * rte_jobstats_set_period() function. Time spent in this function will be + * added to job's runtime. + * + * @param job + * The job data structure handler. + * @param job_result + * Result of calling job callback. + */ +typedef void (*rte_job_update_period_cb_t)(struct rte_jobstats *job, + int64_t job_result); + +struct rte_jobstats { + uint64_t period; + /**< Estimated period of execution. */ + + uint64_t min_period; + /**< Minimum period. */ + + uint64_t max_period; + /**< Maximum period. */ + + int64_t target; + /**< Desired value for this job. */ + + rte_job_update_period_cb_t update_period_cb; + /**< Period update callback. */ + + uint64_t exec_time; + /**< Total time (sum) that this job was executing. */ + + uint64_t min_exec_time; + /**< Minimum execute time. */ + + uint64_t max_exec_time; + /**< Minimum execute time. */ + + uint64_t exec_cnt; + /**< Execute count. */ + + char name[RTE_JOBSTATS_NAMESIZE]; + /**< Name of this job */ + + struct rte_jobstats_context *context; + /**< Job stats context object that is executing this job. */ +} __rte_cache_aligned; + +struct rte_jobstats_context { + /** Viariable holding time at different points: + * -# loop start time if loop was started but no job executed yet. + * -# job start time if job is currently executing. + * -# job finish time if job finished its execution. + * -# loop finish time if loop finished its execution. */ + uint64_t state_time; + + uint64_t loop_executed_jobs; + /**< Count of executed jobs in this loop. */ + + /* Statistics start. */ + + uint64_t exec_time; + /**< Total time taken to execute jobs, not including management time. */ + + uint64_t min_exec_time; + /**< Minimum loop execute time. */ + + uint64_t max_exec_time; + /**< Minimum loop execute time. */ + + /** + * Sum of time that is not the execute time (ex: from job finish to next + * job start). + * + * This time might be considered as overhead of library + job scheduling. + */ + uint64_t management_time; + + uint64_t min_management_time; + /**< Minimum management time */ + + uint64_t max_management_time; + /**< Maximum management time */ + + uint64_t start_time; + /**< Time since last reset stats. */ + + uint64_t job_exec_cnt; + /**< Total count of executed jobs. */ + + uint64_t loop_cnt; + /**< Total count of executed loops with at least one executed job. */ +} __rte_cache_aligned; + +/** + * Initialize given context object with default values. + * + * @param ctx + * Job stats context object to initialize. + * + * @return + * 0 on success + * -EINVAL if *ctx* is NULL + */ +int +rte_jobstats_context_init(struct rte_jobstats_context *ctx); + +/** + * Mark that new set of jobs start executing. + * + * @param ctx + * Job stats context object. + */ +void +rte_jobstats_context_start(struct rte_jobstats_context *ctx); + +/** + * Mark that there is no more jobs ready to execute in this turn. Calculate + * stats for this loop turn. + * + * @param ctx + * Job stats context. + */ +void +rte_jobstats_context_finish(struct rte_jobstats_context *ctx); + +/** + * Function resets job context statistics. + * + * @param ctx + * Job stats context which statistics will be reset. + */ +void +rte_jobstats_context_reset(struct rte_jobstats_context *ctx); + +/** + * Initialize given job stats object. + * + * @param job + * Job object. + * @param name + * Optional job name. + * @param min_period + * Minimum period that this job can accept. + * @param max_period + * Maximum period that this job can accept. + * @param initial_period + * Initial period. It will be checked against *min_period* and *max_period*. + * @param target + * Target value that this job try to achieve. + * + * @return + * 0 on success + * -EINVAL if *job* is NULL + */ +int +rte_jobstats_init(struct rte_jobstats *job, const char *name, + uint64_t min_period, uint64_t max_period, uint64_t initial_period, + int64_t target); + +/** + * Set job desired target value. Difference between target and job value + * value must be used to properly adjust job execute period value. + * + * @param job + * The job object. + * @param target + * New target. + */ +void +rte_jobstats_set_target(struct rte_jobstats *job, int64_t target); + +/** + * Mark that *job* is starting of its execution in context of *ctx* object. + * + * @param ctx + * Job stats context. + * @param job + * Job object. + * @return + * 0 on success + * -EINVAL if *ctx* or *job* is NULL or *job* is executing in another context + * context already, + */ +int +rte_jobstats_start(struct rte_jobstats_context *ctx, struct rte_jobstats *job); + +/** + * Mark that *job* finished its execution, but time of this work will be skipped + * and added to management time. + * + * @param job + * Job object. + * + * @return + * 0 on success + * -EINVAL if job is NULL or job was not started (it have no context). + */ +int +rte_jobstats_abort(struct rte_jobstats *job); + +/** + * Mark that *job* finished its execution. Context in which it was executing + * will receive stat update. After this function call *job* object is ready to + * be executed in other context. + * + * @param job + * Job object. + * @param job_value + * Job value. Job should pass in this parameter a value that it try to optimize + * for example the number of packets it processed. + * + * @return + * 0 if job's period was not updated (job target equals *job_value*) + * 1 if job's period was updated + * -EINVAL if job is NULL or job was not started (it have no context). + */ +int +rte_jobstats_finish(struct rte_jobstats *job, int64_t job_value); + +/** + * Set execute period of given job. + * + * @param job + * The job object. + * @param period + * New period value. + * @param saturate + * If zero, skip period saturation to min, max range. + */ +void +rte_jobstats_set_period(struct rte_jobstats *job, uint64_t period, + uint8_t saturate); +/** + * Set minimum execute period of given job. Current period will be checked + * against new minimum value. + * + * @param job + * The job object. + * @param period + * New minimum period value. + */ +void +rte_jobstats_set_min(struct rte_jobstats *job, uint64_t period); +/** + * Set maximum execute period of given job. Current period will be checked + * against new maximum value. + * + * @param job + * The job object. + * @param period + * New maximum period value. + */ +void +rte_jobstats_set_max(struct rte_jobstats *job, uint64_t period); + +/** + * Set update period callback that is invoked after job finish. + * + * If application wants to do more sophisticated calculations than default + * it can provide this handler. + * + * @param job + * Job object. + * @param update_pedriod_cb + * Callback to set. If NULL restore default update function. + */ +void +rte_jobstats_set_update_period_function(struct rte_jobstats *job, + rte_job_update_period_cb_t update_period_cb); + +/** + * Function resets job statistics. + * + * @param job + * Job which statistics will be reset. + */ +void +rte_jobstats_reset(struct rte_jobstats *job); + +#ifdef __cplusplus +} +#endif + +#endif /* JOBSTATS_H_ */ diff --git a/lib/librte_jobstats/rte_jobstats_version.map b/lib/librte_jobstats/rte_jobstats_version.map new file mode 100644 index 00000000..f8944143 --- /dev/null +++ b/lib/librte_jobstats/rte_jobstats_version.map @@ -0,0 +1,26 @@ +DPDK_2.0 { + global: + + rte_jobstats_context_finish; + rte_jobstats_context_init; + rte_jobstats_context_reset; + rte_jobstats_context_start; + rte_jobstats_finish; + rte_jobstats_init; + rte_jobstats_reset; + rte_jobstats_set_max; + rte_jobstats_set_min; + rte_jobstats_set_period; + rte_jobstats_set_target; + rte_jobstats_set_update_period_function; + rte_jobstats_start; + + local: *; +}; + +DPDK_16.04 { + global: + + rte_jobstats_abort; + +} DPDK_2.0; diff --git a/lib/librte_kni/Makefile b/lib/librte_kni/Makefile new file mode 100644 index 00000000..1398164d --- /dev/null +++ b/lib/librte_kni/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_kni.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing + +EXPORT_MAP := rte_kni_version.map + +LIBABIVER := 2 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_KNI) := rte_kni.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_KNI)-include := rte_kni.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_eal lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_KNI) += lib/librte_ether + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_kni/rte_kni.c b/lib/librte_kni/rte_kni.c new file mode 100644 index 00000000..ea9baf4c --- /dev/null +++ b/lib/librte_kni/rte_kni.c @@ -0,0 +1,712 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_EXEC_ENV_LINUXAPP +#error "KNI is not supported" +#endif + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include "rte_kni_fifo.h" + +#define MAX_MBUF_BURST_NUM 32 + +/* Maximum number of ring entries */ +#define KNI_FIFO_COUNT_MAX 1024 +#define KNI_FIFO_SIZE (KNI_FIFO_COUNT_MAX * sizeof(void *) + \ + sizeof(struct rte_kni_fifo)) + +#define KNI_REQUEST_MBUF_NUM_MAX 32 + +#define KNI_MEM_CHECK(cond) do { if (cond) goto kni_fail; } while (0) + +/** + * KNI context + */ +struct rte_kni { + char name[RTE_KNI_NAMESIZE]; /**< KNI interface name */ + uint16_t group_id; /**< Group ID of KNI devices */ + uint32_t slot_id; /**< KNI pool slot ID */ + struct rte_mempool *pktmbuf_pool; /**< pkt mbuf mempool */ + unsigned mbuf_size; /**< mbuf size */ + + struct rte_kni_fifo *tx_q; /**< TX queue */ + struct rte_kni_fifo *rx_q; /**< RX queue */ + struct rte_kni_fifo *alloc_q; /**< Allocated mbufs queue */ + struct rte_kni_fifo *free_q; /**< To be freed mbufs queue */ + + /* For request & response */ + struct rte_kni_fifo *req_q; /**< Request queue */ + struct rte_kni_fifo *resp_q; /**< Response queue */ + void * sync_addr; /**< Req/Resp Mem address */ + + struct rte_kni_ops ops; /**< operations for request */ + uint8_t in_use : 1; /**< kni in use */ +}; + +enum kni_ops_status { + KNI_REQ_NO_REGISTER = 0, + KNI_REQ_REGISTERED, +}; + +/** + * KNI memzone pool slot + */ +struct rte_kni_memzone_slot { + uint32_t id; + uint8_t in_use : 1; /**< slot in use */ + + /* Memzones */ + const struct rte_memzone *m_ctx; /**< KNI ctx */ + const struct rte_memzone *m_tx_q; /**< TX queue */ + const struct rte_memzone *m_rx_q; /**< RX queue */ + const struct rte_memzone *m_alloc_q; /**< Allocated mbufs queue */ + const struct rte_memzone *m_free_q; /**< To be freed mbufs queue */ + const struct rte_memzone *m_req_q; /**< Request queue */ + const struct rte_memzone *m_resp_q; /**< Response queue */ + const struct rte_memzone *m_sync_addr; + + /* Free linked list */ + struct rte_kni_memzone_slot *next; /**< Next slot link.list */ +}; + +/** + * KNI memzone pool + */ +struct rte_kni_memzone_pool { + uint8_t initialized : 1; /**< Global KNI pool init flag */ + + uint32_t max_ifaces; /**< Max. num of KNI ifaces */ + struct rte_kni_memzone_slot *slots; /**< Pool slots */ + rte_spinlock_t mutex; /**< alloc/relase mutex */ + + /* Free memzone slots linked-list */ + struct rte_kni_memzone_slot *free; /**< First empty slot */ + struct rte_kni_memzone_slot *free_tail; /**< Last empty slot */ +}; + + +static void kni_free_mbufs(struct rte_kni *kni); +static void kni_allocate_mbufs(struct rte_kni *kni); + +static volatile int kni_fd = -1; +static struct rte_kni_memzone_pool kni_memzone_pool = { + .initialized = 0, +}; + +static const struct rte_memzone * +kni_memzone_reserve(const char *name, size_t len, int socket_id, + unsigned flags) +{ + const struct rte_memzone *mz = rte_memzone_lookup(name); + + if (mz == NULL) + mz = rte_memzone_reserve(name, len, socket_id, flags); + + return mz; +} + +/* Pool mgmt */ +static struct rte_kni_memzone_slot* +kni_memzone_pool_alloc(void) +{ + struct rte_kni_memzone_slot *slot; + + rte_spinlock_lock(&kni_memzone_pool.mutex); + + if (!kni_memzone_pool.free) { + rte_spinlock_unlock(&kni_memzone_pool.mutex); + return NULL; + } + + slot = kni_memzone_pool.free; + kni_memzone_pool.free = slot->next; + slot->in_use = 1; + + if (!kni_memzone_pool.free) + kni_memzone_pool.free_tail = NULL; + + rte_spinlock_unlock(&kni_memzone_pool.mutex); + + return slot; +} + +static void +kni_memzone_pool_release(struct rte_kni_memzone_slot *slot) +{ + rte_spinlock_lock(&kni_memzone_pool.mutex); + + if (kni_memzone_pool.free) + kni_memzone_pool.free_tail->next = slot; + else + kni_memzone_pool.free = slot; + + kni_memzone_pool.free_tail = slot; + slot->next = NULL; + slot->in_use = 0; + + rte_spinlock_unlock(&kni_memzone_pool.mutex); +} + + +/* Shall be called before any allocation happens */ +void +rte_kni_init(unsigned int max_kni_ifaces) +{ + uint32_t i; + struct rte_kni_memzone_slot *it; + const struct rte_memzone *mz; +#define OBJNAMSIZ 32 + char obj_name[OBJNAMSIZ]; + char mz_name[RTE_MEMZONE_NAMESIZE]; + + /* Immediately return if KNI is already initialized */ + if (kni_memzone_pool.initialized) { + RTE_LOG(WARNING, KNI, "Double call to rte_kni_init()"); + return; + } + + if (max_kni_ifaces == 0) { + RTE_LOG(ERR, KNI, "Invalid number of max_kni_ifaces %d\n", + max_kni_ifaces); + rte_panic("Unable to initialize KNI\n"); + } + + /* Check FD and open */ + if (kni_fd < 0) { + kni_fd = open("/dev/" KNI_DEVICE, O_RDWR); + if (kni_fd < 0) + rte_panic("Can not open /dev/%s\n", KNI_DEVICE); + } + + /* Allocate slot objects */ + kni_memzone_pool.slots = (struct rte_kni_memzone_slot *) + rte_malloc(NULL, + sizeof(struct rte_kni_memzone_slot) * + max_kni_ifaces, + 0); + KNI_MEM_CHECK(kni_memzone_pool.slots == NULL); + + /* Initialize general pool variables */ + kni_memzone_pool.initialized = 1; + kni_memzone_pool.max_ifaces = max_kni_ifaces; + kni_memzone_pool.free = &kni_memzone_pool.slots[0]; + rte_spinlock_init(&kni_memzone_pool.mutex); + + /* Pre-allocate all memzones of all the slots; panic on error */ + for (i = 0; i < max_kni_ifaces; i++) { + + /* Recover current slot */ + it = &kni_memzone_pool.slots[i]; + it->id = i; + + /* Allocate KNI context */ + snprintf(mz_name, RTE_MEMZONE_NAMESIZE, "KNI_INFO_%d", i); + mz = kni_memzone_reserve(mz_name, sizeof(struct rte_kni), + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_ctx = mz; + + /* TX RING */ + snprintf(obj_name, OBJNAMSIZ, "kni_tx_%d", i); + mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_tx_q = mz; + + /* RX RING */ + snprintf(obj_name, OBJNAMSIZ, "kni_rx_%d", i); + mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_rx_q = mz; + + /* ALLOC RING */ + snprintf(obj_name, OBJNAMSIZ, "kni_alloc_%d", i); + mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_alloc_q = mz; + + /* FREE RING */ + snprintf(obj_name, OBJNAMSIZ, "kni_free_%d", i); + mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_free_q = mz; + + /* Request RING */ + snprintf(obj_name, OBJNAMSIZ, "kni_req_%d", i); + mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_req_q = mz; + + /* Response RING */ + snprintf(obj_name, OBJNAMSIZ, "kni_resp_%d", i); + mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_resp_q = mz; + + /* Req/Resp sync mem area */ + snprintf(obj_name, OBJNAMSIZ, "kni_sync_%d", i); + mz = kni_memzone_reserve(obj_name, KNI_FIFO_SIZE, + SOCKET_ID_ANY, 0); + KNI_MEM_CHECK(mz == NULL); + it->m_sync_addr = mz; + + if ((i+1) == max_kni_ifaces) { + it->next = NULL; + kni_memzone_pool.free_tail = it; + } else + it->next = &kni_memzone_pool.slots[i+1]; + } + + return; + +kni_fail: + rte_panic("Unable to allocate memory for max_kni_ifaces:%d. Increase the amount of hugepages memory\n", + max_kni_ifaces); +} + + +struct rte_kni * +rte_kni_alloc(struct rte_mempool *pktmbuf_pool, + const struct rte_kni_conf *conf, + struct rte_kni_ops *ops) +{ + int ret; + struct rte_kni_device_info dev_info; + struct rte_kni *ctx; + char intf_name[RTE_KNI_NAMESIZE]; + char mz_name[RTE_MEMZONE_NAMESIZE]; + const struct rte_memzone *mz; + struct rte_kni_memzone_slot *slot = NULL; + + if (!pktmbuf_pool || !conf || !conf->name[0]) + return NULL; + + /* Check if KNI subsystem has been initialized */ + if (kni_memzone_pool.initialized != 1) { + RTE_LOG(ERR, KNI, "KNI subsystem has not been initialized. Invoke rte_kni_init() first\n"); + return NULL; + } + + /* Get an available slot from the pool */ + slot = kni_memzone_pool_alloc(); + if (!slot) { + RTE_LOG(ERR, KNI, "Cannot allocate more KNI interfaces; increase the number of max_kni_ifaces(current %d) or release unusued ones.\n", + kni_memzone_pool.max_ifaces); + return NULL; + } + + /* Recover ctx */ + ctx = slot->m_ctx->addr; + snprintf(intf_name, RTE_KNI_NAMESIZE, "%s", conf->name); + + if (ctx->in_use) { + RTE_LOG(ERR, KNI, "KNI %s is in use\n", ctx->name); + return NULL; + } + memset(ctx, 0, sizeof(struct rte_kni)); + if (ops) + memcpy(&ctx->ops, ops, sizeof(struct rte_kni_ops)); + + memset(&dev_info, 0, sizeof(dev_info)); + dev_info.bus = conf->addr.bus; + dev_info.devid = conf->addr.devid; + dev_info.function = conf->addr.function; + dev_info.vendor_id = conf->id.vendor_id; + dev_info.device_id = conf->id.device_id; + dev_info.core_id = conf->core_id; + dev_info.force_bind = conf->force_bind; + dev_info.group_id = conf->group_id; + dev_info.mbuf_size = conf->mbuf_size; + + snprintf(ctx->name, RTE_KNI_NAMESIZE, "%s", intf_name); + snprintf(dev_info.name, RTE_KNI_NAMESIZE, "%s", intf_name); + + RTE_LOG(INFO, KNI, "pci: %02x:%02x:%02x \t %02x:%02x\n", + dev_info.bus, dev_info.devid, dev_info.function, + dev_info.vendor_id, dev_info.device_id); + /* TX RING */ + mz = slot->m_tx_q; + ctx->tx_q = mz->addr; + kni_fifo_init(ctx->tx_q, KNI_FIFO_COUNT_MAX); + dev_info.tx_phys = mz->phys_addr; + + /* RX RING */ + mz = slot->m_rx_q; + ctx->rx_q = mz->addr; + kni_fifo_init(ctx->rx_q, KNI_FIFO_COUNT_MAX); + dev_info.rx_phys = mz->phys_addr; + + /* ALLOC RING */ + mz = slot->m_alloc_q; + ctx->alloc_q = mz->addr; + kni_fifo_init(ctx->alloc_q, KNI_FIFO_COUNT_MAX); + dev_info.alloc_phys = mz->phys_addr; + + /* FREE RING */ + mz = slot->m_free_q; + ctx->free_q = mz->addr; + kni_fifo_init(ctx->free_q, KNI_FIFO_COUNT_MAX); + dev_info.free_phys = mz->phys_addr; + + /* Request RING */ + mz = slot->m_req_q; + ctx->req_q = mz->addr; + kni_fifo_init(ctx->req_q, KNI_FIFO_COUNT_MAX); + dev_info.req_phys = mz->phys_addr; + + /* Response RING */ + mz = slot->m_resp_q; + ctx->resp_q = mz->addr; + kni_fifo_init(ctx->resp_q, KNI_FIFO_COUNT_MAX); + dev_info.resp_phys = mz->phys_addr; + + /* Req/Resp sync mem area */ + mz = slot->m_sync_addr; + ctx->sync_addr = mz->addr; + dev_info.sync_va = mz->addr; + dev_info.sync_phys = mz->phys_addr; + + + /* MBUF mempool */ + snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, + pktmbuf_pool->name); + mz = rte_memzone_lookup(mz_name); + KNI_MEM_CHECK(mz == NULL); + dev_info.mbuf_va = mz->addr; + dev_info.mbuf_phys = mz->phys_addr; + ctx->pktmbuf_pool = pktmbuf_pool; + ctx->group_id = conf->group_id; + ctx->slot_id = slot->id; + ctx->mbuf_size = conf->mbuf_size; + + ret = ioctl(kni_fd, RTE_KNI_IOCTL_CREATE, &dev_info); + KNI_MEM_CHECK(ret < 0); + + ctx->in_use = 1; + + /* Allocate mbufs and then put them into alloc_q */ + kni_allocate_mbufs(ctx); + + return ctx; + +kni_fail: + if (slot) + kni_memzone_pool_release(&kni_memzone_pool.slots[slot->id]); + + return NULL; +} + +static void +kni_free_fifo(struct rte_kni_fifo *fifo) +{ + int ret; + struct rte_mbuf *pkt; + + do { + ret = kni_fifo_get(fifo, (void **)&pkt, 1); + if (ret) + rte_pktmbuf_free(pkt); + } while (ret); +} + +int +rte_kni_release(struct rte_kni *kni) +{ + struct rte_kni_device_info dev_info; + uint32_t slot_id; + + if (!kni || !kni->in_use) + return -1; + + snprintf(dev_info.name, sizeof(dev_info.name), "%s", kni->name); + if (ioctl(kni_fd, RTE_KNI_IOCTL_RELEASE, &dev_info) < 0) { + RTE_LOG(ERR, KNI, "Fail to release kni device\n"); + return -1; + } + + /* mbufs in all fifo should be released, except request/response */ + kni_free_fifo(kni->tx_q); + kni_free_fifo(kni->rx_q); + kni_free_fifo(kni->alloc_q); + kni_free_fifo(kni->free_q); + + slot_id = kni->slot_id; + + /* Memset the KNI struct */ + memset(kni, 0, sizeof(struct rte_kni)); + + /* Release memzone */ + if (slot_id > kni_memzone_pool.max_ifaces) { + rte_panic("KNI pool: corrupted slot ID: %d, max: %d\n", + slot_id, kni_memzone_pool.max_ifaces); + } + kni_memzone_pool_release(&kni_memzone_pool.slots[slot_id]); + + return 0; +} + +int +rte_kni_handle_request(struct rte_kni *kni) +{ + unsigned ret; + struct rte_kni_request *req; + + if (kni == NULL) + return -1; + + /* Get request mbuf */ + ret = kni_fifo_get(kni->req_q, (void **)&req, 1); + if (ret != 1) + return 0; /* It is OK of can not getting the request mbuf */ + + if (req != kni->sync_addr) { + rte_panic("Wrong req pointer %p\n", req); + } + + /* Analyze the request and call the relevant actions for it */ + switch (req->req_id) { + case RTE_KNI_REQ_CHANGE_MTU: /* Change MTU */ + if (kni->ops.change_mtu) + req->result = kni->ops.change_mtu(kni->ops.port_id, + req->new_mtu); + break; + case RTE_KNI_REQ_CFG_NETWORK_IF: /* Set network interface up/down */ + if (kni->ops.config_network_if) + req->result = kni->ops.config_network_if(\ + kni->ops.port_id, req->if_up); + break; + default: + RTE_LOG(ERR, KNI, "Unknown request id %u\n", req->req_id); + req->result = -EINVAL; + break; + } + + /* Construct response mbuf and put it back to resp_q */ + ret = kni_fifo_put(kni->resp_q, (void **)&req, 1); + if (ret != 1) { + RTE_LOG(ERR, KNI, "Fail to put the muf back to resp_q\n"); + return -1; /* It is an error of can't putting the mbuf back */ + } + + return 0; +} + +unsigned +rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num) +{ + unsigned ret = kni_fifo_put(kni->rx_q, (void **)mbufs, num); + + /* Get mbufs from free_q and then free them */ + kni_free_mbufs(kni); + + return ret; +} + +unsigned +rte_kni_rx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, unsigned num) +{ + unsigned ret = kni_fifo_get(kni->tx_q, (void **)mbufs, num); + + /* If buffers removed, allocate mbufs and then put them into alloc_q */ + if (ret) + kni_allocate_mbufs(kni); + + return ret; +} + +static void +kni_free_mbufs(struct rte_kni *kni) +{ + int i, ret; + struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM]; + + ret = kni_fifo_get(kni->free_q, (void **)pkts, MAX_MBUF_BURST_NUM); + if (likely(ret > 0)) { + for (i = 0; i < ret; i++) + rte_pktmbuf_free(pkts[i]); + } +} + +static void +kni_allocate_mbufs(struct rte_kni *kni) +{ + int i, ret; + struct rte_mbuf *pkts[MAX_MBUF_BURST_NUM]; + + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pool) != + offsetof(struct rte_kni_mbuf, pool)); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, buf_addr) != + offsetof(struct rte_kni_mbuf, buf_addr)); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, next) != + offsetof(struct rte_kni_mbuf, next)); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_off) != + offsetof(struct rte_kni_mbuf, data_off)); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, data_len) != + offsetof(struct rte_kni_mbuf, data_len)); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, pkt_len) != + offsetof(struct rte_kni_mbuf, pkt_len)); + RTE_BUILD_BUG_ON(offsetof(struct rte_mbuf, ol_flags) != + offsetof(struct rte_kni_mbuf, ol_flags)); + + /* Check if pktmbuf pool has been configured */ + if (kni->pktmbuf_pool == NULL) { + RTE_LOG(ERR, KNI, "No valid mempool for allocating mbufs\n"); + return; + } + + for (i = 0; i < MAX_MBUF_BURST_NUM; i++) { + pkts[i] = rte_pktmbuf_alloc(kni->pktmbuf_pool); + if (unlikely(pkts[i] == NULL)) { + /* Out of memory */ + RTE_LOG(ERR, KNI, "Out of memory\n"); + break; + } + } + + /* No pkt mbuf alocated */ + if (i <= 0) + return; + + ret = kni_fifo_put(kni->alloc_q, (void **)pkts, i); + + /* Check if any mbufs not put into alloc_q, and then free them */ + if (ret >= 0 && ret < i && ret < MAX_MBUF_BURST_NUM) { + int j; + + for (j = ret; j < i; j++) + rte_pktmbuf_free(pkts[j]); + } +} + +struct rte_kni * +rte_kni_get(const char *name) +{ + uint32_t i; + struct rte_kni_memzone_slot *it; + struct rte_kni *kni; + + /* Note: could be improved perf-wise if necessary */ + for (i = 0; i < kni_memzone_pool.max_ifaces; i++) { + it = &kni_memzone_pool.slots[i]; + if (it->in_use == 0) + continue; + kni = it->m_ctx->addr; + if (strncmp(kni->name, name, RTE_KNI_NAMESIZE) == 0) + return kni; + } + + return NULL; +} + +const char * +rte_kni_get_name(const struct rte_kni *kni) +{ + return kni->name; +} + +static enum kni_ops_status +kni_check_request_register(struct rte_kni_ops *ops) +{ + /* check if KNI request ops has been registered*/ + if( NULL == ops ) + return KNI_REQ_NO_REGISTER; + + if((NULL == ops->change_mtu) && (NULL == ops->config_network_if)) + return KNI_REQ_NO_REGISTER; + + return KNI_REQ_REGISTERED; +} + +int +rte_kni_register_handlers(struct rte_kni *kni,struct rte_kni_ops *ops) +{ + enum kni_ops_status req_status; + + if (NULL == ops) { + RTE_LOG(ERR, KNI, "Invalid KNI request operation.\n"); + return -1; + } + + if (NULL == kni) { + RTE_LOG(ERR, KNI, "Invalid kni info.\n"); + return -1; + } + + req_status = kni_check_request_register(&kni->ops); + if ( KNI_REQ_REGISTERED == req_status) { + RTE_LOG(ERR, KNI, "The KNI request operation has already registered.\n"); + return -1; + } + + memcpy(&kni->ops, ops, sizeof(struct rte_kni_ops)); + return 0; +} + +int +rte_kni_unregister_handlers(struct rte_kni *kni) +{ + if (NULL == kni) { + RTE_LOG(ERR, KNI, "Invalid kni info.\n"); + return -1; + } + + kni->ops.change_mtu = NULL; + kni->ops.config_network_if = NULL; + return 0; +} +void +rte_kni_close(void) +{ + if (kni_fd < 0) + return; + + close(kni_fd); + kni_fd = -1; +} diff --git a/lib/librte_kni/rte_kni.h b/lib/librte_kni/rte_kni.h new file mode 100644 index 00000000..9899a170 --- /dev/null +++ b/lib/librte_kni/rte_kni.h @@ -0,0 +1,256 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_KNI_H_ +#define _RTE_KNI_H_ + +/** + * @file + * RTE KNI + * + * The KNI library provides the ability to create and destroy kernel NIC + * interfaces that may be used by the RTE application to receive/transmit + * packets from/to Linux kernel net interfaces. + * + * This library provide two APIs to burst receive packets from KNI interfaces, + * and burst transmit packets to KNI interfaces. + */ + +#include +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +struct rte_kni; +struct rte_mbuf; + +/** + * Structure which has the function pointers for KNI interface. + */ +struct rte_kni_ops { + uint8_t port_id; /* Port ID */ + + /* Pointer to function of changing MTU */ + int (*change_mtu)(uint8_t port_id, unsigned new_mtu); + + /* Pointer to function of configuring network interface */ + int (*config_network_if)(uint8_t port_id, uint8_t if_up); +}; + +/** + * Structure for configuring KNI device. + */ +struct rte_kni_conf { + /* + * KNI name which will be used in relevant network device. + * Let the name as short as possible, as it will be part of + * memzone name. + */ + char name[RTE_KNI_NAMESIZE]; + uint32_t core_id; /* Core ID to bind kernel thread on */ + uint16_t group_id; /* Group ID */ + unsigned mbuf_size; /* mbuf size */ + struct rte_pci_addr addr; + struct rte_pci_id id; + + uint8_t force_bind : 1; /* Flag to bind kernel thread */ +}; + +/** + * Initialize and preallocate KNI subsystem + * + * This function is to be executed on the MASTER lcore only, after EAL + * initialization and before any KNI interface is attempted to be + * allocated + * + * @param max_kni_ifaces + * The maximum number of KNI interfaces that can coexist concurrently + */ +void rte_kni_init(unsigned int max_kni_ifaces); + + +/** + * Allocate KNI interface according to the port id, mbuf size, mbuf pool, + * configurations and callbacks for kernel requests.The KNI interface created + * in the kernel space is the net interface the traditional Linux application + * talking to. + * + * The rte_kni_alloc shall not be called before rte_kni_init() has been + * called. rte_kni_alloc is thread safe. + * + * @param pktmbuf_pool + * The mempool for allocting mbufs for packets. + * @param conf + * The pointer to the configurations of the KNI device. + * @param ops + * The pointer to the callbacks for the KNI kernel requests. + * + * @return + * - The pointer to the context of a KNI interface. + * - NULL indicate error. + */ +struct rte_kni *rte_kni_alloc(struct rte_mempool *pktmbuf_pool, + const struct rte_kni_conf *conf, struct rte_kni_ops *ops); + +/** + * Release KNI interface according to the context. It will also release the + * paired KNI interface in kernel space. All processing on the specific KNI + * context need to be stopped before calling this interface. + * + * rte_kni_release is thread safe. + * + * @param kni + * The pointer to the context of an existent KNI interface. + * + * @return + * - 0 indicates success. + * - negative value indicates failure. + */ +int rte_kni_release(struct rte_kni *kni); + +/** + * It is used to handle the request mbufs sent from kernel space. + * Then analyzes it and calls the specific actions for the specific requests. + * Finally constructs the response mbuf and puts it back to the resp_q. + * + * @param kni + * The pointer to the context of an existent KNI interface. + * + * @return + * - 0 + * - negative value indicates failure. + */ +int rte_kni_handle_request(struct rte_kni *kni); + +/** + * Retrieve a burst of packets from a KNI interface. The retrieved packets are + * stored in rte_mbuf structures whose pointers are supplied in the array of + * mbufs, and the maximum number is indicated by num. It handles the freeing of + * the mbufs in the free queue of KNI interface. + * + * @param kni + * The KNI interface context. + * @param mbufs + * The array to store the pointers of mbufs. + * @param num + * The maximum number per burst. + * + * @return + * The actual number of packets retrieved. + */ +unsigned rte_kni_rx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, + unsigned num); + +/** + * Send a burst of packets to a KNI interface. The packets to be sent out are + * stored in rte_mbuf structures whose pointers are supplied in the array of + * mbufs, and the maximum number is indicated by num. It handles allocating + * the mbufs for KNI interface alloc queue. + * + * @param kni + * The KNI interface context. + * @param mbufs + * The array to store the pointers of mbufs. + * @param num + * The maximum number per burst. + * + * @return + * The actual number of packets sent. + */ +unsigned rte_kni_tx_burst(struct rte_kni *kni, struct rte_mbuf **mbufs, + unsigned num); + +/** + * Get the KNI context of its name. + * + * @param name + * pointer to the KNI device name. + * + * @return + * On success: Pointer to KNI interface. + * On failure: NULL. + */ +struct rte_kni *rte_kni_get(const char *name); + +/** + * Get the name given to a KNI device + * + * @param kni + * The KNI instance to query + * @return + * The pointer to the KNI name + */ +const char *rte_kni_get_name(const struct rte_kni *kni); + +/** + * Register KNI request handling for a specified port,and it can + * be called by master process or slave process. + * + * @param kni + * pointer to struct rte_kni. + * @param ops + * ponter to struct rte_kni_ops. + * + * @return + * On success: 0 + * On failure: -1 + */ +int rte_kni_register_handlers(struct rte_kni *kni, struct rte_kni_ops *ops); + +/** + * Unregister KNI request handling for a specified port. + * + * @param kni + * pointer to struct rte_kni. + * + * @return + * On success: 0 + * On failure: -1 + */ +int rte_kni_unregister_handlers(struct rte_kni *kni); + +/** + * Close KNI device. + */ +void rte_kni_close(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_KNI_H_ */ diff --git a/lib/librte_kni/rte_kni_fifo.h b/lib/librte_kni/rte_kni_fifo.h new file mode 100644 index 00000000..8cb85873 --- /dev/null +++ b/lib/librte_kni/rte_kni_fifo.h @@ -0,0 +1,93 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + + + +/** + * Initializes the kni fifo structure + */ +static void +kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size) +{ + /* Ensure size is power of 2 */ + if (size & (size - 1)) + rte_panic("KNI fifo size must be power of 2\n"); + + fifo->write = 0; + fifo->read = 0; + fifo->len = size; + fifo->elem_size = sizeof(void *); +} + +/** + * Adds num elements into the fifo. Return the number actually written + */ +static inline unsigned +kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) +{ + unsigned i = 0; + unsigned fifo_write = fifo->write; + unsigned fifo_read = fifo->read; + unsigned new_write = fifo_write; + + for (i = 0; i < num; i++) { + new_write = (new_write + 1) & (fifo->len - 1); + + if (new_write == fifo_read) + break; + fifo->buffer[fifo_write] = data[i]; + fifo_write = new_write; + } + fifo->write = fifo_write; + return i; +} + +/** + * Get up to num elements from the fifo. Return the number actully read + */ +static inline unsigned +kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) +{ + unsigned i = 0; + unsigned new_read = fifo->read; + unsigned fifo_write = fifo->write; + for (i = 0; i < num; i++) { + if (new_read == fifo_write) + break; + + data[i] = fifo->buffer[new_read]; + new_read = (new_read + 1) & (fifo->len - 1); + } + fifo->read = new_read; + return i; +} diff --git a/lib/librte_kni/rte_kni_version.map b/lib/librte_kni/rte_kni_version.map new file mode 100644 index 00000000..acd515eb --- /dev/null +++ b/lib/librte_kni/rte_kni_version.map @@ -0,0 +1,17 @@ +DPDK_2.0 { + global: + + rte_kni_alloc; + rte_kni_close; + rte_kni_get; + rte_kni_get_name; + rte_kni_handle_request; + rte_kni_init; + rte_kni_register_handlers; + rte_kni_release; + rte_kni_rx_burst; + rte_kni_tx_burst; + rte_kni_unregister_handlers; + + local: *; +}; diff --git a/lib/librte_kvargs/Makefile b/lib/librte_kvargs/Makefile new file mode 100644 index 00000000..87b09f20 --- /dev/null +++ b/lib/librte_kvargs/Makefile @@ -0,0 +1,55 @@ +# BSD LICENSE +# +# Copyright 2014 6WIND S.A. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# - Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# +# - Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# +# - Neither the name of 6WIND S.A. nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE +# COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, +# INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +# SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, +# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED +# OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_kvargs.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_kvargs_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_KVARGS) := rte_kvargs.c + +# install includes +INCS := rte_kvargs.h +SYMLINK-$(CONFIG_RTE_LIBRTE_KVARGS)-include := $(INCS) + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_KVARGS) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_kvargs/rte_kvargs.c b/lib/librte_kvargs/rte_kvargs.c new file mode 100644 index 00000000..8d56abd4 --- /dev/null +++ b/lib/librte_kvargs/rte_kvargs.c @@ -0,0 +1,210 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include + +#include "rte_kvargs.h" + +/* + * Receive a string with a list of arguments following the pattern + * key=value;key=value;... and insert them into the list. + * strtok() is used so the params string will be copied to be modified. + */ +static int +rte_kvargs_tokenize(struct rte_kvargs *kvlist, const char *params) +{ + unsigned i; + char *str; + char *ctx1 = NULL; + char *ctx2 = NULL; + + /* Copy the const char *params to a modifiable string + * to pass to rte_strsplit + */ + kvlist->str = strdup(params); + if (kvlist->str == NULL) { + RTE_LOG(ERR, PMD, "Cannot parse arguments: not enough memory\n"); + return -1; + } + + /* browse each key/value pair and add it in kvlist */ + str = kvlist->str; + while ((str = strtok_r(str, RTE_KVARGS_PAIRS_DELIM, &ctx1)) != NULL) { + + i = kvlist->count; + if (i >= RTE_KVARGS_MAX) { + RTE_LOG(ERR, PMD, "Cannot parse arguments: list full\n"); + return -1; + } + + kvlist->pairs[i].key = strtok_r(str, RTE_KVARGS_KV_DELIM, &ctx2); + kvlist->pairs[i].value = strtok_r(NULL, RTE_KVARGS_KV_DELIM, &ctx2); + if (kvlist->pairs[i].key == NULL || kvlist->pairs[i].value == NULL) { + RTE_LOG(ERR, PMD, + "Cannot parse arguments: wrong key or value\n" + "params=<%s>\n", params); + return -1; + } + + kvlist->count++; + str = NULL; + } + + return 0; +} + +/* + * Determine whether a key is valid or not by looking + * into a list of valid keys. + */ +static int +is_valid_key(const char *valid[], const char *key_match) +{ + const char **valid_ptr; + + for (valid_ptr = valid; *valid_ptr != NULL; valid_ptr++) { + if (strcmp(key_match, *valid_ptr) == 0) + return 1; + } + return 0; +} + +/* + * Determine whether all keys are valid or not by looking + * into a list of valid keys. + */ +static int +check_for_valid_keys(struct rte_kvargs *kvlist, + const char *valid[]) +{ + unsigned i, ret; + struct rte_kvargs_pair *pair; + + for (i = 0; i < kvlist->count; i++) { + pair = &kvlist->pairs[i]; + ret = is_valid_key(valid, pair->key); + if (!ret) { + RTE_LOG(ERR, PMD, + "Error parsing device, invalid key <%s>\n", + pair->key); + return -1; + } + } + return 0; +} + +/* + * Return the number of times a given arg_name exists in the key/value list. + * E.g. given a list = { rx = 0, rx = 1, tx = 2 } the number of args for + * arg "rx" will be 2. + */ +unsigned +rte_kvargs_count(const struct rte_kvargs *kvlist, const char *key_match) +{ + const struct rte_kvargs_pair *pair; + unsigned i, ret; + + ret = 0; + for (i = 0; i < kvlist->count; i++) { + pair = &kvlist->pairs[i]; + if (key_match == NULL || strcmp(pair->key, key_match) == 0) + ret++; + } + + return ret; +} + +/* + * For each matching key, call the given handler function. + */ +int +rte_kvargs_process(const struct rte_kvargs *kvlist, + const char *key_match, + arg_handler_t handler, + void *opaque_arg) +{ + const struct rte_kvargs_pair *pair; + unsigned i; + + for (i = 0; i < kvlist->count; i++) { + pair = &kvlist->pairs[i]; + if (key_match == NULL || strcmp(pair->key, key_match) == 0) { + if ((*handler)(pair->key, pair->value, opaque_arg) < 0) + return -1; + } + } + return 0; +} + +/* free the rte_kvargs structure */ +void +rte_kvargs_free(struct rte_kvargs *kvlist) +{ + if (!kvlist) + return; + + free(kvlist->str); + free(kvlist); +} + +/* + * Parse the arguments "key=value;key=value;..." string and return + * an allocated structure that contains a key/value list. Also + * check if only valid keys were used. + */ +struct rte_kvargs * +rte_kvargs_parse(const char *args, const char *valid_keys[]) +{ + struct rte_kvargs *kvlist; + + kvlist = malloc(sizeof(*kvlist)); + if (kvlist == NULL) + return NULL; + memset(kvlist, 0, sizeof(*kvlist)); + + if (rte_kvargs_tokenize(kvlist, args) < 0) { + rte_kvargs_free(kvlist); + return NULL; + } + + if (valid_keys != NULL && check_for_valid_keys(kvlist, valid_keys) < 0) { + rte_kvargs_free(kvlist); + return NULL; + } + + return kvlist; +} diff --git a/lib/librte_kvargs/rte_kvargs.h b/lib/librte_kvargs/rte_kvargs.h new file mode 100644 index 00000000..ae9ae79f --- /dev/null +++ b/lib/librte_kvargs/rte_kvargs.h @@ -0,0 +1,156 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2013 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_KVARGS_H_ +#define _RTE_KVARGS_H_ + +/** + * @file + * RTE Argument parsing + * + * This module can be used to parse arguments whose format is + * key1=value1,key2=value2,key3=value3,... + * + * The same key can appear several times with the same or a different + * value. Indeed, the arguments are stored as a list of key/values + * associations and not as a dictionary. + * + * This file provides some helpers that are especially used by virtual + * ethernet devices at initialization for arguments parsing. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Maximum number of key/value associations */ +#define RTE_KVARGS_MAX 32 + +/** separator character used between each pair */ +#define RTE_KVARGS_PAIRS_DELIM "," + +/** separator character used between key and value */ +#define RTE_KVARGS_KV_DELIM "=" + +/** Type of callback function used by rte_kvargs_process() */ +typedef int (*arg_handler_t)(const char *key, const char *value, void *opaque); + +/** A key/value association */ +struct rte_kvargs_pair { + char *key; /**< the name (key) of the association */ + char *value; /**< the value associated to that key */ +}; + +/** Store a list of key/value associations */ +struct rte_kvargs { + char *str; /**< copy of the argument string */ + unsigned count; /**< number of entries in the list */ + struct rte_kvargs_pair pairs[RTE_KVARGS_MAX]; /**< list of key/values */ +}; + +/** + * Allocate a rte_kvargs and store key/value associations from a string + * + * The function allocates and fills a rte_kvargs structure from a given + * string whose format is key1=value1,key2=value2,... + * + * The structure can be freed with rte_kvargs_free(). + * + * @param args + * The input string containing the key/value associations + * @param valid_keys + * A list of valid keys (table of const char *, the last must be NULL). + * This argument is ignored if NULL + * + * @return + * - A pointer to an allocated rte_kvargs structure on success + * - NULL on error + */ +struct rte_kvargs *rte_kvargs_parse(const char *args, const char *valid_keys[]); + +/** + * Free a rte_kvargs structure + * + * Free a rte_kvargs structure previously allocated with + * rte_kvargs_parse(). + * + * @param kvlist + * The rte_kvargs structure + */ +void rte_kvargs_free(struct rte_kvargs *kvlist); + +/** + * Call a handler function for each key/value matching the key + * + * For each key/value association that matches the given key, calls the + * handler function with the for a given arg_name passing the value on the + * dictionary for that key and a given extra argument. If *kvlist* is NULL + * function does nothing. + * + * @param kvlist + * The rte_kvargs structure + * @param key_match + * The key on which the handler should be called, or NULL to process handler + * on all associations + * @param handler + * The function to call for each matching key + * @param opaque_arg + * A pointer passed unchanged to the handler + * + * @return + * - 0 on success + * - Negative on error + */ +int rte_kvargs_process(const struct rte_kvargs *kvlist, + const char *key_match, arg_handler_t handler, void *opaque_arg); + +/** + * Count the number of associations matching the given key + * + * @param kvlist + * The rte_kvargs structure + * @param key_match + * The key that should match, or NULL to count all associations + + * @return + * The number of entries + */ +unsigned rte_kvargs_count(const struct rte_kvargs *kvlist, + const char *key_match); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_kvargs/rte_kvargs_version.map b/lib/librte_kvargs/rte_kvargs_version.map new file mode 100644 index 00000000..2030ec46 --- /dev/null +++ b/lib/librte_kvargs/rte_kvargs_version.map @@ -0,0 +1,10 @@ +DPDK_2.0 { + global: + + rte_kvargs_count; + rte_kvargs_free; + rte_kvargs_parse; + rte_kvargs_process; + + local: *; +}; diff --git a/lib/librte_lpm/Makefile b/lib/librte_lpm/Makefile new file mode 100644 index 00000000..656ade27 --- /dev/null +++ b/lib/librte_lpm/Makefile @@ -0,0 +1,59 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_lpm.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_lpm_version.map + +LIBABIVER := 2 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_LPM) := rte_lpm.c rte_lpm6.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include := rte_lpm.h rte_lpm6.h + +ifneq ($(filter y,$(CONFIG_RTE_ARCH_ARM) $(CONFIG_RTE_ARCH_ARM64)),) +SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_neon.h +else ifeq ($(CONFIG_RTE_ARCH_X86),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_LPM)-include += rte_lpm_sse.h +endif + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_LPM) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_lpm/rte_lpm.c b/lib/librte_lpm/rte_lpm.c new file mode 100644 index 00000000..8bdf6065 --- /dev/null +++ b/lib/librte_lpm/rte_lpm.c @@ -0,0 +1,1919 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include /* for definition of RTE_CACHE_LINE_SIZE */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_lpm.h" + +TAILQ_HEAD(rte_lpm_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_lpm_tailq = { + .name = "RTE_LPM", +}; +EAL_REGISTER_TAILQ(rte_lpm_tailq) + +#define MAX_DEPTH_TBL24 24 + +enum valid_flag { + INVALID = 0, + VALID +}; + +/* Macro to enable/disable run-time checks. */ +#if defined(RTE_LIBRTE_LPM_DEBUG) +#include +#define VERIFY_DEPTH(depth) do { \ + if ((depth == 0) || (depth > RTE_LPM_MAX_DEPTH)) \ + rte_panic("LPM: Invalid depth (%u) at line %d", \ + (unsigned)(depth), __LINE__); \ +} while (0) +#else +#define VERIFY_DEPTH(depth) +#endif + +/* + * Converts a given depth value to its corresponding mask value. + * + * depth (IN) : range = 1 - 32 + * mask (OUT) : 32bit mask + */ +static uint32_t __attribute__((pure)) +depth_to_mask(uint8_t depth) +{ + VERIFY_DEPTH(depth); + + /* To calculate a mask start with a 1 on the left hand side and right + * shift while populating the left hand side with 1's + */ + return (int)0x80000000 >> (depth - 1); +} + +/* + * Converts given depth value to its corresponding range value. + */ +static inline uint32_t __attribute__((pure)) +depth_to_range(uint8_t depth) +{ + VERIFY_DEPTH(depth); + + /* + * Calculate tbl24 range. (Note: 2^depth = 1 << depth) + */ + if (depth <= MAX_DEPTH_TBL24) + return 1 << (MAX_DEPTH_TBL24 - depth); + + /* Else if depth is greater than 24 */ + return 1 << (RTE_LPM_MAX_DEPTH - depth); +} + +/* + * Find an existing lpm table and return a pointer to it. + */ +struct rte_lpm_v20 * +rte_lpm_find_existing_v20(const char *name) +{ + struct rte_lpm_v20 *l = NULL; + struct rte_tailq_entry *te; + struct rte_lpm_list *lpm_list; + + lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, lpm_list, next) { + l = (struct rte_lpm_v20 *) te->data; + if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return l; +} +VERSION_SYMBOL(rte_lpm_find_existing, _v20, 2.0); + +struct rte_lpm * +rte_lpm_find_existing_v1604(const char *name) +{ + struct rte_lpm *l = NULL; + struct rte_tailq_entry *te; + struct rte_lpm_list *lpm_list; + + lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, lpm_list, next) { + l = (struct rte_lpm *) te->data; + if (strncmp(name, l->name, RTE_LPM_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return l; +} +BIND_DEFAULT_SYMBOL(rte_lpm_find_existing, _v1604, 16.04); +MAP_STATIC_SYMBOL(struct rte_lpm *rte_lpm_find_existing(const char *name), + rte_lpm_find_existing_v1604); + +/* + * Allocates memory for LPM object + */ +struct rte_lpm_v20 * +rte_lpm_create_v20(const char *name, int socket_id, int max_rules, + __rte_unused int flags) +{ + char mem_name[RTE_LPM_NAMESIZE]; + struct rte_lpm_v20 *lpm = NULL; + struct rte_tailq_entry *te; + uint32_t mem_size; + struct rte_lpm_list *lpm_list; + + lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list); + + RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl_entry_v20) != 2); + + /* Check user arguments. */ + if ((name == NULL) || (socket_id < -1) || (max_rules == 0)) { + rte_errno = EINVAL; + return NULL; + } + + snprintf(mem_name, sizeof(mem_name), "LPM_%s", name); + + /* Determine the amount of memory to allocate. */ + mem_size = sizeof(*lpm) + (sizeof(lpm->rules_tbl[0]) * max_rules); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* guarantee there's no existing */ + TAILQ_FOREACH(te, lpm_list, next) { + lpm = (struct rte_lpm_v20 *) te->data; + if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0) + break; + } + lpm = NULL; + if (te != NULL) { + rte_errno = EEXIST; + goto exit; + } + + /* allocate tailq entry */ + te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n"); + goto exit; + } + + /* Allocate memory to store the LPM data structures. */ + lpm = (struct rte_lpm_v20 *)rte_zmalloc_socket(mem_name, mem_size, + RTE_CACHE_LINE_SIZE, socket_id); + if (lpm == NULL) { + RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); + rte_free(te); + goto exit; + } + + /* Save user arguments. */ + lpm->max_rules = max_rules; + snprintf(lpm->name, sizeof(lpm->name), "%s", name); + + te->data = (void *) lpm; + + TAILQ_INSERT_TAIL(lpm_list, te, next); + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return lpm; +} +VERSION_SYMBOL(rte_lpm_create, _v20, 2.0); + +struct rte_lpm * +rte_lpm_create_v1604(const char *name, int socket_id, + const struct rte_lpm_config *config) +{ + char mem_name[RTE_LPM_NAMESIZE]; + struct rte_lpm *lpm = NULL; + struct rte_tailq_entry *te; + uint32_t mem_size, rules_size, tbl8s_size; + struct rte_lpm_list *lpm_list; + + lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list); + + RTE_BUILD_BUG_ON(sizeof(struct rte_lpm_tbl_entry) != 4); + + /* Check user arguments. */ + if ((name == NULL) || (socket_id < -1) || (config->max_rules == 0) + || config->number_tbl8s > RTE_LPM_MAX_TBL8_NUM_GROUPS) { + rte_errno = EINVAL; + return NULL; + } + + snprintf(mem_name, sizeof(mem_name), "LPM_%s", name); + + /* Determine the amount of memory to allocate. */ + mem_size = sizeof(*lpm); + rules_size = sizeof(struct rte_lpm_rule) * config->max_rules; + tbl8s_size = (sizeof(struct rte_lpm_tbl_entry) * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* guarantee there's no existing */ + TAILQ_FOREACH(te, lpm_list, next) { + lpm = (struct rte_lpm *) te->data; + if (strncmp(name, lpm->name, RTE_LPM_NAMESIZE) == 0) + break; + } + lpm = NULL; + if (te != NULL) { + rte_errno = EEXIST; + goto exit; + } + + /* allocate tailq entry */ + te = rte_zmalloc("LPM_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, LPM, "Failed to allocate tailq entry\n"); + goto exit; + } + + /* Allocate memory to store the LPM data structures. */ + lpm = (struct rte_lpm *)rte_zmalloc_socket(mem_name, mem_size, + RTE_CACHE_LINE_SIZE, socket_id); + if (lpm == NULL) { + RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); + rte_free(te); + goto exit; + } + + lpm->rules_tbl = (struct rte_lpm_rule *)rte_zmalloc_socket(NULL, + (size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id); + + if (lpm->rules_tbl == NULL) { + RTE_LOG(ERR, LPM, "LPM rules_tbl memory allocation failed\n"); + rte_free(lpm); + lpm = NULL; + rte_free(te); + goto exit; + } + + lpm->tbl8 = (struct rte_lpm_tbl_entry *)rte_zmalloc_socket(NULL, + (size_t)tbl8s_size, RTE_CACHE_LINE_SIZE, socket_id); + + if (lpm->tbl8 == NULL) { + RTE_LOG(ERR, LPM, "LPM tbl8 memory allocation failed\n"); + rte_free(lpm); + lpm = NULL; + rte_free(te); + goto exit; + } + + /* Save user arguments. */ + lpm->max_rules = config->max_rules; + lpm->number_tbl8s = config->number_tbl8s; + snprintf(lpm->name, sizeof(lpm->name), "%s", name); + + te->data = (void *) lpm; + + TAILQ_INSERT_TAIL(lpm_list, te, next); + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return lpm; +} +BIND_DEFAULT_SYMBOL(rte_lpm_create, _v1604, 16.04); +MAP_STATIC_SYMBOL( + struct rte_lpm *rte_lpm_create(const char *name, int socket_id, + const struct rte_lpm_config *config), rte_lpm_create_v1604); + +/* + * Deallocates memory for given LPM table. + */ +void +rte_lpm_free_v20(struct rte_lpm_v20 *lpm) +{ + struct rte_lpm_list *lpm_list; + struct rte_tailq_entry *te; + + /* Check user arguments. */ + if (lpm == NULL) + return; + + lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find our tailq entry */ + TAILQ_FOREACH(te, lpm_list, next) { + if (te->data == (void *) lpm) + break; + } + if (te != NULL) + TAILQ_REMOVE(lpm_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(lpm->rules_tbl); + rte_free(lpm); + rte_free(te); +} +VERSION_SYMBOL(rte_lpm_free, _v20, 2.0); + +void +rte_lpm_free_v1604(struct rte_lpm *lpm) +{ + struct rte_lpm_list *lpm_list; + struct rte_tailq_entry *te; + + /* Check user arguments. */ + if (lpm == NULL) + return; + + lpm_list = RTE_TAILQ_CAST(rte_lpm_tailq.head, rte_lpm_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find our tailq entry */ + TAILQ_FOREACH(te, lpm_list, next) { + if (te->data == (void *) lpm) + break; + } + if (te != NULL) + TAILQ_REMOVE(lpm_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(lpm->rules_tbl); + rte_free(lpm); + rte_free(te); +} +BIND_DEFAULT_SYMBOL(rte_lpm_free, _v1604, 16.04); +MAP_STATIC_SYMBOL(void rte_lpm_free(struct rte_lpm *lpm), + rte_lpm_free_v1604); + +/* + * Adds a rule to the rule table. + * + * NOTE: The rule table is split into 32 groups. Each group contains rules that + * apply to a specific prefix depth (i.e. group 1 contains rules that apply to + * prefixes with a depth of 1 etc.). In the following code (depth - 1) is used + * to refer to depth 1 because even though the depth range is 1 - 32, depths + * are stored in the rule table from 0 - 31. + * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. + */ +static inline int32_t +rule_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth, + uint8_t next_hop) +{ + uint32_t rule_gindex, rule_index, last_rule; + int i; + + VERIFY_DEPTH(depth); + + /* Scan through rule group to see if rule already exists. */ + if (lpm->rule_info[depth - 1].used_rules > 0) { + + /* rule_gindex stands for rule group index. */ + rule_gindex = lpm->rule_info[depth - 1].first_rule; + /* Initialise rule_index to point to start of rule group. */ + rule_index = rule_gindex; + /* Last rule = Last used rule in this rule group. */ + last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules; + + for (; rule_index < last_rule; rule_index++) { + + /* If rule already exists update its next_hop and return. */ + if (lpm->rules_tbl[rule_index].ip == ip_masked) { + lpm->rules_tbl[rule_index].next_hop = next_hop; + + return rule_index; + } + } + + if (rule_index == lpm->max_rules) + return -ENOSPC; + } else { + /* Calculate the position in which the rule will be stored. */ + rule_index = 0; + + for (i = depth - 1; i > 0; i--) { + if (lpm->rule_info[i - 1].used_rules > 0) { + rule_index = lpm->rule_info[i - 1].first_rule + + lpm->rule_info[i - 1].used_rules; + break; + } + } + if (rule_index == lpm->max_rules) + return -ENOSPC; + + lpm->rule_info[depth - 1].first_rule = rule_index; + } + + /* Make room for the new rule in the array. */ + for (i = RTE_LPM_MAX_DEPTH; i > depth; i--) { + if (lpm->rule_info[i - 1].first_rule + + lpm->rule_info[i - 1].used_rules == lpm->max_rules) + return -ENOSPC; + + if (lpm->rule_info[i - 1].used_rules > 0) { + lpm->rules_tbl[lpm->rule_info[i - 1].first_rule + + lpm->rule_info[i - 1].used_rules] + = lpm->rules_tbl[lpm->rule_info[i - 1].first_rule]; + lpm->rule_info[i - 1].first_rule++; + } + } + + /* Add the new rule. */ + lpm->rules_tbl[rule_index].ip = ip_masked; + lpm->rules_tbl[rule_index].next_hop = next_hop; + + /* Increment the used rules counter for this rule group. */ + lpm->rule_info[depth - 1].used_rules++; + + return rule_index; +} + +static inline int32_t +rule_add_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, + uint32_t next_hop) +{ + uint32_t rule_gindex, rule_index, last_rule; + int i; + + VERIFY_DEPTH(depth); + + /* Scan through rule group to see if rule already exists. */ + if (lpm->rule_info[depth - 1].used_rules > 0) { + + /* rule_gindex stands for rule group index. */ + rule_gindex = lpm->rule_info[depth - 1].first_rule; + /* Initialise rule_index to point to start of rule group. */ + rule_index = rule_gindex; + /* Last rule = Last used rule in this rule group. */ + last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules; + + for (; rule_index < last_rule; rule_index++) { + + /* If rule already exists update its next_hop and return. */ + if (lpm->rules_tbl[rule_index].ip == ip_masked) { + lpm->rules_tbl[rule_index].next_hop = next_hop; + + return rule_index; + } + } + + if (rule_index == lpm->max_rules) + return -ENOSPC; + } else { + /* Calculate the position in which the rule will be stored. */ + rule_index = 0; + + for (i = depth - 1; i > 0; i--) { + if (lpm->rule_info[i - 1].used_rules > 0) { + rule_index = lpm->rule_info[i - 1].first_rule + + lpm->rule_info[i - 1].used_rules; + break; + } + } + if (rule_index == lpm->max_rules) + return -ENOSPC; + + lpm->rule_info[depth - 1].first_rule = rule_index; + } + + /* Make room for the new rule in the array. */ + for (i = RTE_LPM_MAX_DEPTH; i > depth; i--) { + if (lpm->rule_info[i - 1].first_rule + + lpm->rule_info[i - 1].used_rules == lpm->max_rules) + return -ENOSPC; + + if (lpm->rule_info[i - 1].used_rules > 0) { + lpm->rules_tbl[lpm->rule_info[i - 1].first_rule + + lpm->rule_info[i - 1].used_rules] + = lpm->rules_tbl[lpm->rule_info[i - 1].first_rule]; + lpm->rule_info[i - 1].first_rule++; + } + } + + /* Add the new rule. */ + lpm->rules_tbl[rule_index].ip = ip_masked; + lpm->rules_tbl[rule_index].next_hop = next_hop; + + /* Increment the used rules counter for this rule group. */ + lpm->rule_info[depth - 1].used_rules++; + + return rule_index; +} + +/* + * Delete a rule from the rule table. + * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. + */ +static inline void +rule_delete_v20(struct rte_lpm_v20 *lpm, int32_t rule_index, uint8_t depth) +{ + int i; + + VERIFY_DEPTH(depth); + + lpm->rules_tbl[rule_index] = + lpm->rules_tbl[lpm->rule_info[depth - 1].first_rule + + lpm->rule_info[depth - 1].used_rules - 1]; + + for (i = depth; i < RTE_LPM_MAX_DEPTH; i++) { + if (lpm->rule_info[i].used_rules > 0) { + lpm->rules_tbl[lpm->rule_info[i].first_rule - 1] = + lpm->rules_tbl[lpm->rule_info[i].first_rule + + lpm->rule_info[i].used_rules - 1]; + lpm->rule_info[i].first_rule--; + } + } + + lpm->rule_info[depth - 1].used_rules--; +} + +static inline void +rule_delete_v1604(struct rte_lpm *lpm, int32_t rule_index, uint8_t depth) +{ + int i; + + VERIFY_DEPTH(depth); + + lpm->rules_tbl[rule_index] = + lpm->rules_tbl[lpm->rule_info[depth - 1].first_rule + + lpm->rule_info[depth - 1].used_rules - 1]; + + for (i = depth; i < RTE_LPM_MAX_DEPTH; i++) { + if (lpm->rule_info[i].used_rules > 0) { + lpm->rules_tbl[lpm->rule_info[i].first_rule - 1] = + lpm->rules_tbl[lpm->rule_info[i].first_rule + + lpm->rule_info[i].used_rules - 1]; + lpm->rule_info[i].first_rule--; + } + } + + lpm->rule_info[depth - 1].used_rules--; +} + +/* + * Finds a rule in rule table. + * NOTE: Valid range for depth parameter is 1 .. 32 inclusive. + */ +static inline int32_t +rule_find_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth) +{ + uint32_t rule_gindex, last_rule, rule_index; + + VERIFY_DEPTH(depth); + + rule_gindex = lpm->rule_info[depth - 1].first_rule; + last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules; + + /* Scan used rules at given depth to find rule. */ + for (rule_index = rule_gindex; rule_index < last_rule; rule_index++) { + /* If rule is found return the rule index. */ + if (lpm->rules_tbl[rule_index].ip == ip_masked) + return rule_index; + } + + /* If rule is not found return -EINVAL. */ + return -EINVAL; +} + +static inline int32_t +rule_find_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth) +{ + uint32_t rule_gindex, last_rule, rule_index; + + VERIFY_DEPTH(depth); + + rule_gindex = lpm->rule_info[depth - 1].first_rule; + last_rule = rule_gindex + lpm->rule_info[depth - 1].used_rules; + + /* Scan used rules at given depth to find rule. */ + for (rule_index = rule_gindex; rule_index < last_rule; rule_index++) { + /* If rule is found return the rule index. */ + if (lpm->rules_tbl[rule_index].ip == ip_masked) + return rule_index; + } + + /* If rule is not found return -EINVAL. */ + return -EINVAL; +} + +/* + * Find, clean and allocate a tbl8. + */ +static inline int32_t +tbl8_alloc_v20(struct rte_lpm_tbl_entry_v20 *tbl8) +{ + uint32_t group_idx; /* tbl8 group index. */ + struct rte_lpm_tbl_entry_v20 *tbl8_entry; + + /* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */ + for (group_idx = 0; group_idx < RTE_LPM_TBL8_NUM_GROUPS; + group_idx++) { + tbl8_entry = &tbl8[group_idx * RTE_LPM_TBL8_GROUP_NUM_ENTRIES]; + /* If a free tbl8 group is found clean it and set as VALID. */ + if (!tbl8_entry->valid_group) { + memset(&tbl8_entry[0], 0, + RTE_LPM_TBL8_GROUP_NUM_ENTRIES * + sizeof(tbl8_entry[0])); + + tbl8_entry->valid_group = VALID; + + /* Return group index for allocated tbl8 group. */ + return group_idx; + } + } + + /* If there are no tbl8 groups free then return error. */ + return -ENOSPC; +} + +static inline int32_t +tbl8_alloc_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t number_tbl8s) +{ + uint32_t group_idx; /* tbl8 group index. */ + struct rte_lpm_tbl_entry *tbl8_entry; + + /* Scan through tbl8 to find a free (i.e. INVALID) tbl8 group. */ + for (group_idx = 0; group_idx < number_tbl8s; group_idx++) { + tbl8_entry = &tbl8[group_idx * RTE_LPM_TBL8_GROUP_NUM_ENTRIES]; + /* If a free tbl8 group is found clean it and set as VALID. */ + if (!tbl8_entry->valid_group) { + memset(&tbl8_entry[0], 0, + RTE_LPM_TBL8_GROUP_NUM_ENTRIES * + sizeof(tbl8_entry[0])); + + tbl8_entry->valid_group = VALID; + + /* Return group index for allocated tbl8 group. */ + return group_idx; + } + } + + /* If there are no tbl8 groups free then return error. */ + return -ENOSPC; +} + +static inline void +tbl8_free_v20(struct rte_lpm_tbl_entry_v20 *tbl8, uint32_t tbl8_group_start) +{ + /* Set tbl8 group invalid*/ + tbl8[tbl8_group_start].valid_group = INVALID; +} + +static inline void +tbl8_free_v1604(struct rte_lpm_tbl_entry *tbl8, uint32_t tbl8_group_start) +{ + /* Set tbl8 group invalid*/ + tbl8[tbl8_group_start].valid_group = INVALID; +} + +static inline int32_t +add_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth, + uint8_t next_hop) +{ + uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j; + + /* Calculate the index into Table24. */ + tbl24_index = ip >> 8; + tbl24_range = depth_to_range(depth); + + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + /* + * For invalid OR valid and non-extended tbl 24 entries set + * entry. + */ + if (!lpm->tbl24[i].valid || (lpm->tbl24[i].valid_group == 0 && + lpm->tbl24[i].depth <= depth)) { + + struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { + .valid = VALID, + .valid_group = 0, + .depth = depth, + }; + new_tbl24_entry.next_hop = next_hop; + + /* Setting tbl24 entry in one go to avoid race + * conditions + */ + lpm->tbl24[i] = new_tbl24_entry; + + continue; + } + + if (lpm->tbl24[i].valid_group == 1) { + /* If tbl24 entry is valid and extended calculate the + * index into tbl8. + */ + tbl8_index = lpm->tbl24[i].group_idx * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < tbl8_group_end; j++) { + if (!lpm->tbl8[j].valid || + lpm->tbl8[j].depth <= depth) { + struct rte_lpm_tbl_entry_v20 + new_tbl8_entry = { + .valid = VALID, + .valid_group = VALID, + .depth = depth, + }; + new_tbl8_entry.next_hop = next_hop; + + /* + * Setting tbl8 entry in one go to avoid + * race conditions + */ + lpm->tbl8[j] = new_tbl8_entry; + + continue; + } + } + } + } + + return 0; +} + +static inline int32_t +add_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, + uint32_t next_hop) +{ +#define group_idx next_hop + uint32_t tbl24_index, tbl24_range, tbl8_index, tbl8_group_end, i, j; + + /* Calculate the index into Table24. */ + tbl24_index = ip >> 8; + tbl24_range = depth_to_range(depth); + + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + /* + * For invalid OR valid and non-extended tbl 24 entries set + * entry. + */ + if (!lpm->tbl24[i].valid || (lpm->tbl24[i].valid_group == 0 && + lpm->tbl24[i].depth <= depth)) { + + struct rte_lpm_tbl_entry new_tbl24_entry = { + .next_hop = next_hop, + .valid = VALID, + .valid_group = 0, + .depth = depth, + }; + + /* Setting tbl24 entry in one go to avoid race + * conditions + */ + lpm->tbl24[i] = new_tbl24_entry; + + continue; + } + + if (lpm->tbl24[i].valid_group == 1) { + /* If tbl24 entry is valid and extended calculate the + * index into tbl8. + */ + tbl8_index = lpm->tbl24[i].group_idx * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < tbl8_group_end; j++) { + if (!lpm->tbl8[j].valid || + lpm->tbl8[j].depth <= depth) { + struct rte_lpm_tbl_entry + new_tbl8_entry = { + .valid = VALID, + .valid_group = VALID, + .depth = depth, + .next_hop = next_hop, + }; + + /* + * Setting tbl8 entry in one go to avoid + * race conditions + */ + lpm->tbl8[j] = new_tbl8_entry; + + continue; + } + } + } + } +#undef group_idx + return 0; +} + +static inline int32_t +add_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, uint8_t depth, + uint8_t next_hop) +{ + uint32_t tbl24_index; + int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index, + tbl8_range, i; + + tbl24_index = (ip_masked >> 8); + tbl8_range = depth_to_range(depth); + + if (!lpm->tbl24[tbl24_index].valid) { + /* Search for a free tbl8 group. */ + tbl8_group_index = tbl8_alloc_v20(lpm->tbl8); + + /* Check tbl8 allocation was successful. */ + if (tbl8_group_index < 0) { + return tbl8_group_index; + } + + /* Find index into tbl8 and range. */ + tbl8_index = (tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES) + + (ip_masked & 0xFF); + + /* Set tbl8 entry. */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; + lpm->tbl8[i].valid = VALID; + } + + /* + * Update tbl24 entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go + */ + + struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { + { .group_idx = (uint8_t)tbl8_group_index, }, + .valid = VALID, + .valid_group = 1, + .depth = 0, + }; + + lpm->tbl24[tbl24_index] = new_tbl24_entry; + + } /* If valid entry but not extended calculate the index into Table8. */ + else if (lpm->tbl24[tbl24_index].valid_group == 0) { + /* Search for free tbl8 group. */ + tbl8_group_index = tbl8_alloc_v20(lpm->tbl8); + + if (tbl8_group_index < 0) { + return tbl8_group_index; + } + + tbl8_group_start = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_group_start + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + /* Populate new tbl8 with tbl24 value. */ + for (i = tbl8_group_start; i < tbl8_group_end; i++) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = lpm->tbl24[tbl24_index].depth; + lpm->tbl8[i].next_hop = + lpm->tbl24[tbl24_index].next_hop; + } + + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + + /* Insert new rule into the tbl8 entry. */ + for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) { + if (!lpm->tbl8[i].valid || + lpm->tbl8[i].depth <= depth) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; + + continue; + } + } + + /* + * Update tbl24 entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go. + */ + + struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { + { .group_idx = (uint8_t)tbl8_group_index, }, + .valid = VALID, + .valid_group = 1, + .depth = 0, + }; + + lpm->tbl24[tbl24_index] = new_tbl24_entry; + + } else { /* + * If it is valid, extended entry calculate the index into tbl8. + */ + tbl8_group_index = lpm->tbl24[tbl24_index].group_idx; + tbl8_group_start = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + + if (!lpm->tbl8[i].valid || + lpm->tbl8[i].depth <= depth) { + struct rte_lpm_tbl_entry_v20 new_tbl8_entry = { + .valid = VALID, + .depth = depth, + .valid_group = lpm->tbl8[i].valid_group, + }; + new_tbl8_entry.next_hop = next_hop; + /* + * Setting tbl8 entry in one go to avoid race + * condition + */ + lpm->tbl8[i] = new_tbl8_entry; + + continue; + } + } + } + + return 0; +} + +static inline int32_t +add_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, uint8_t depth, + uint32_t next_hop) +{ +#define group_idx next_hop + uint32_t tbl24_index; + int32_t tbl8_group_index, tbl8_group_start, tbl8_group_end, tbl8_index, + tbl8_range, i; + + tbl24_index = (ip_masked >> 8); + tbl8_range = depth_to_range(depth); + + if (!lpm->tbl24[tbl24_index].valid) { + /* Search for a free tbl8 group. */ + tbl8_group_index = tbl8_alloc_v1604(lpm->tbl8, lpm->number_tbl8s); + + /* Check tbl8 allocation was successful. */ + if (tbl8_group_index < 0) { + return tbl8_group_index; + } + + /* Find index into tbl8 and range. */ + tbl8_index = (tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES) + + (ip_masked & 0xFF); + + /* Set tbl8 entry. */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; + lpm->tbl8[i].valid = VALID; + } + + /* + * Update tbl24 entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go + */ + + struct rte_lpm_tbl_entry new_tbl24_entry = { + .group_idx = (uint8_t)tbl8_group_index, + .valid = VALID, + .valid_group = 1, + .depth = 0, + }; + + lpm->tbl24[tbl24_index] = new_tbl24_entry; + + } /* If valid entry but not extended calculate the index into Table8. */ + else if (lpm->tbl24[tbl24_index].valid_group == 0) { + /* Search for free tbl8 group. */ + tbl8_group_index = tbl8_alloc_v1604(lpm->tbl8, lpm->number_tbl8s); + + if (tbl8_group_index < 0) { + return tbl8_group_index; + } + + tbl8_group_start = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_group_start + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + /* Populate new tbl8 with tbl24 value. */ + for (i = tbl8_group_start; i < tbl8_group_end; i++) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = lpm->tbl24[tbl24_index].depth; + lpm->tbl8[i].next_hop = + lpm->tbl24[tbl24_index].next_hop; + } + + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + + /* Insert new rule into the tbl8 entry. */ + for (i = tbl8_index; i < tbl8_index + tbl8_range; i++) { + if (!lpm->tbl8[i].valid || + lpm->tbl8[i].depth <= depth) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = depth; + lpm->tbl8[i].next_hop = next_hop; + + continue; + } + } + + /* + * Update tbl24 entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go. + */ + + struct rte_lpm_tbl_entry new_tbl24_entry = { + .group_idx = (uint8_t)tbl8_group_index, + .valid = VALID, + .valid_group = 1, + .depth = 0, + }; + + lpm->tbl24[tbl24_index] = new_tbl24_entry; + + } else { /* + * If it is valid, extended entry calculate the index into tbl8. + */ + tbl8_group_index = lpm->tbl24[tbl24_index].group_idx; + tbl8_group_start = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + + if (!lpm->tbl8[i].valid || + lpm->tbl8[i].depth <= depth) { + struct rte_lpm_tbl_entry new_tbl8_entry = { + .valid = VALID, + .depth = depth, + .next_hop = next_hop, + .valid_group = lpm->tbl8[i].valid_group, + }; + + /* + * Setting tbl8 entry in one go to avoid race + * condition + */ + lpm->tbl8[i] = new_tbl8_entry; + + continue; + } + } + } +#undef group_idx + return 0; +} + +/* + * Add a route + */ +int +rte_lpm_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth, + uint8_t next_hop) +{ + int32_t rule_index, status = 0; + uint32_t ip_masked; + + /* Check user arguments. */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) + return -EINVAL; + + ip_masked = ip & depth_to_mask(depth); + + /* Add the rule to the rule table. */ + rule_index = rule_add_v20(lpm, ip_masked, depth, next_hop); + + /* If the is no space available for new rule return error. */ + if (rule_index < 0) { + return rule_index; + } + + if (depth <= MAX_DEPTH_TBL24) { + status = add_depth_small_v20(lpm, ip_masked, depth, next_hop); + } else { /* If depth > RTE_LPM_MAX_DEPTH_TBL24 */ + status = add_depth_big_v20(lpm, ip_masked, depth, next_hop); + + /* + * If add fails due to exhaustion of tbl8 extensions delete + * rule that was added to rule table. + */ + if (status < 0) { + rule_delete_v20(lpm, rule_index, depth); + + return status; + } + } + + return 0; +} +VERSION_SYMBOL(rte_lpm_add, _v20, 2.0); + +int +rte_lpm_add_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, + uint32_t next_hop) +{ + int32_t rule_index, status = 0; + uint32_t ip_masked; + + /* Check user arguments. */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) + return -EINVAL; + + ip_masked = ip & depth_to_mask(depth); + + /* Add the rule to the rule table. */ + rule_index = rule_add_v1604(lpm, ip_masked, depth, next_hop); + + /* If the is no space available for new rule return error. */ + if (rule_index < 0) { + return rule_index; + } + + if (depth <= MAX_DEPTH_TBL24) { + status = add_depth_small_v1604(lpm, ip_masked, depth, next_hop); + } else { /* If depth > RTE_LPM_MAX_DEPTH_TBL24 */ + status = add_depth_big_v1604(lpm, ip_masked, depth, next_hop); + + /* + * If add fails due to exhaustion of tbl8 extensions delete + * rule that was added to rule table. + */ + if (status < 0) { + rule_delete_v1604(lpm, rule_index, depth); + + return status; + } + } + + return 0; +} +BIND_DEFAULT_SYMBOL(rte_lpm_add, _v1604, 16.04); +MAP_STATIC_SYMBOL(int rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, + uint8_t depth, uint32_t next_hop), rte_lpm_add_v1604); + +/* + * Look for a rule in the high-level rules table + */ +int +rte_lpm_is_rule_present_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth, +uint8_t *next_hop) +{ + uint32_t ip_masked; + int32_t rule_index; + + /* Check user arguments. */ + if ((lpm == NULL) || + (next_hop == NULL) || + (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) + return -EINVAL; + + /* Look for the rule using rule_find. */ + ip_masked = ip & depth_to_mask(depth); + rule_index = rule_find_v20(lpm, ip_masked, depth); + + if (rule_index >= 0) { + *next_hop = lpm->rules_tbl[rule_index].next_hop; + return 1; + } + + /* If rule is not found return 0. */ + return 0; +} +VERSION_SYMBOL(rte_lpm_is_rule_present, _v20, 2.0); + +int +rte_lpm_is_rule_present_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, +uint32_t *next_hop) +{ + uint32_t ip_masked; + int32_t rule_index; + + /* Check user arguments. */ + if ((lpm == NULL) || + (next_hop == NULL) || + (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) + return -EINVAL; + + /* Look for the rule using rule_find. */ + ip_masked = ip & depth_to_mask(depth); + rule_index = rule_find_v1604(lpm, ip_masked, depth); + + if (rule_index >= 0) { + *next_hop = lpm->rules_tbl[rule_index].next_hop; + return 1; + } + + /* If rule is not found return 0. */ + return 0; +} +BIND_DEFAULT_SYMBOL(rte_lpm_is_rule_present, _v1604, 16.04); +MAP_STATIC_SYMBOL(int rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, + uint8_t depth, uint32_t *next_hop), rte_lpm_is_rule_present_v1604); + +static inline int32_t +find_previous_rule_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth, + uint8_t *sub_rule_depth) +{ + int32_t rule_index; + uint32_t ip_masked; + uint8_t prev_depth; + + for (prev_depth = (uint8_t)(depth - 1); prev_depth > 0; prev_depth--) { + ip_masked = ip & depth_to_mask(prev_depth); + + rule_index = rule_find_v20(lpm, ip_masked, prev_depth); + + if (rule_index >= 0) { + *sub_rule_depth = prev_depth; + return rule_index; + } + } + + return -1; +} + +static inline int32_t +find_previous_rule_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, + uint8_t *sub_rule_depth) +{ + int32_t rule_index; + uint32_t ip_masked; + uint8_t prev_depth; + + for (prev_depth = (uint8_t)(depth - 1); prev_depth > 0; prev_depth--) { + ip_masked = ip & depth_to_mask(prev_depth); + + rule_index = rule_find_v1604(lpm, ip_masked, prev_depth); + + if (rule_index >= 0) { + *sub_rule_depth = prev_depth; + return rule_index; + } + } + + return -1; +} + +static inline int32_t +delete_depth_small_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, + uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) +{ + uint32_t tbl24_range, tbl24_index, tbl8_group_index, tbl8_index, i, j; + + /* Calculate the range and index into Table24. */ + tbl24_range = depth_to_range(depth); + tbl24_index = (ip_masked >> 8); + + /* + * Firstly check the sub_rule_index. A -1 indicates no replacement rule + * and a positive number indicates a sub_rule_index. + */ + if (sub_rule_index < 0) { + /* + * If no replacement rule exists then invalidate entries + * associated with this rule. + */ + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + + if (lpm->tbl24[i].valid_group == 0 && + lpm->tbl24[i].depth <= depth) { + lpm->tbl24[i].valid = INVALID; + } else if (lpm->tbl24[i].valid_group == 1) { + /* + * If TBL24 entry is extended, then there has + * to be a rule with depth >= 25 in the + * associated TBL8 group. + */ + + tbl8_group_index = lpm->tbl24[i].group_idx; + tbl8_index = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < (tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) { + + if (lpm->tbl8[j].depth <= depth) + lpm->tbl8[j].valid = INVALID; + } + } + } + } else { + /* + * If a replacement rule exists then modify entries + * associated with this rule. + */ + + struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { + {.next_hop = lpm->rules_tbl[sub_rule_index].next_hop,}, + .valid = VALID, + .valid_group = 0, + .depth = sub_rule_depth, + }; + + struct rte_lpm_tbl_entry_v20 new_tbl8_entry = { + .valid = VALID, + .valid_group = VALID, + .depth = sub_rule_depth, + }; + new_tbl8_entry.next_hop = + lpm->rules_tbl[sub_rule_index].next_hop; + + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + + if (lpm->tbl24[i].valid_group == 0 && + lpm->tbl24[i].depth <= depth) { + lpm->tbl24[i] = new_tbl24_entry; + } else if (lpm->tbl24[i].valid_group == 1) { + /* + * If TBL24 entry is extended, then there has + * to be a rule with depth >= 25 in the + * associated TBL8 group. + */ + + tbl8_group_index = lpm->tbl24[i].group_idx; + tbl8_index = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < (tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) { + + if (lpm->tbl8[j].depth <= depth) + lpm->tbl8[j] = new_tbl8_entry; + } + } + } + } + + return 0; +} + +static inline int32_t +delete_depth_small_v1604(struct rte_lpm *lpm, uint32_t ip_masked, + uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) +{ +#define group_idx next_hop + uint32_t tbl24_range, tbl24_index, tbl8_group_index, tbl8_index, i, j; + + /* Calculate the range and index into Table24. */ + tbl24_range = depth_to_range(depth); + tbl24_index = (ip_masked >> 8); + + /* + * Firstly check the sub_rule_index. A -1 indicates no replacement rule + * and a positive number indicates a sub_rule_index. + */ + if (sub_rule_index < 0) { + /* + * If no replacement rule exists then invalidate entries + * associated with this rule. + */ + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + + if (lpm->tbl24[i].valid_group == 0 && + lpm->tbl24[i].depth <= depth) { + lpm->tbl24[i].valid = INVALID; + } else if (lpm->tbl24[i].valid_group == 1) { + /* + * If TBL24 entry is extended, then there has + * to be a rule with depth >= 25 in the + * associated TBL8 group. + */ + + tbl8_group_index = lpm->tbl24[i].group_idx; + tbl8_index = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < (tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) { + + if (lpm->tbl8[j].depth <= depth) + lpm->tbl8[j].valid = INVALID; + } + } + } + } else { + /* + * If a replacement rule exists then modify entries + * associated with this rule. + */ + + struct rte_lpm_tbl_entry new_tbl24_entry = { + .next_hop = lpm->rules_tbl[sub_rule_index].next_hop, + .valid = VALID, + .valid_group = 0, + .depth = sub_rule_depth, + }; + + struct rte_lpm_tbl_entry new_tbl8_entry = { + .valid = VALID, + .valid_group = VALID, + .depth = sub_rule_depth, + .next_hop = lpm->rules_tbl + [sub_rule_index].next_hop, + }; + + for (i = tbl24_index; i < (tbl24_index + tbl24_range); i++) { + + if (lpm->tbl24[i].valid_group == 0 && + lpm->tbl24[i].depth <= depth) { + lpm->tbl24[i] = new_tbl24_entry; + } else if (lpm->tbl24[i].valid_group == 1) { + /* + * If TBL24 entry is extended, then there has + * to be a rule with depth >= 25 in the + * associated TBL8 group. + */ + + tbl8_group_index = lpm->tbl24[i].group_idx; + tbl8_index = tbl8_group_index * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + for (j = tbl8_index; j < (tbl8_index + + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); j++) { + + if (lpm->tbl8[j].depth <= depth) + lpm->tbl8[j] = new_tbl8_entry; + } + } + } + } +#undef group_idx + return 0; +} + +/* + * Checks if table 8 group can be recycled. + * + * Return of -EEXIST means tbl8 is in use and thus can not be recycled. + * Return of -EINVAL means tbl8 is empty and thus can be recycled + * Return of value > -1 means tbl8 is in use but has all the same values and + * thus can be recycled + */ +static inline int32_t +tbl8_recycle_check_v20(struct rte_lpm_tbl_entry_v20 *tbl8, + uint32_t tbl8_group_start) +{ + uint32_t tbl8_group_end, i; + tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + /* + * Check the first entry of the given tbl8. If it is invalid we know + * this tbl8 does not contain any rule with a depth < RTE_LPM_MAX_DEPTH + * (As they would affect all entries in a tbl8) and thus this table + * can not be recycled. + */ + if (tbl8[tbl8_group_start].valid) { + /* + * If first entry is valid check if the depth is less than 24 + * and if so check the rest of the entries to verify that they + * are all of this depth. + */ + if (tbl8[tbl8_group_start].depth < MAX_DEPTH_TBL24) { + for (i = (tbl8_group_start + 1); i < tbl8_group_end; + i++) { + + if (tbl8[i].depth != + tbl8[tbl8_group_start].depth) { + + return -EEXIST; + } + } + /* If all entries are the same return the tb8 index */ + return tbl8_group_start; + } + + return -EEXIST; + } + /* + * If the first entry is invalid check if the rest of the entries in + * the tbl8 are invalid. + */ + for (i = (tbl8_group_start + 1); i < tbl8_group_end; i++) { + if (tbl8[i].valid) + return -EEXIST; + } + /* If no valid entries are found then return -EINVAL. */ + return -EINVAL; +} + +static inline int32_t +tbl8_recycle_check_v1604(struct rte_lpm_tbl_entry *tbl8, + uint32_t tbl8_group_start) +{ + uint32_t tbl8_group_end, i; + tbl8_group_end = tbl8_group_start + RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + + /* + * Check the first entry of the given tbl8. If it is invalid we know + * this tbl8 does not contain any rule with a depth < RTE_LPM_MAX_DEPTH + * (As they would affect all entries in a tbl8) and thus this table + * can not be recycled. + */ + if (tbl8[tbl8_group_start].valid) { + /* + * If first entry is valid check if the depth is less than 24 + * and if so check the rest of the entries to verify that they + * are all of this depth. + */ + if (tbl8[tbl8_group_start].depth < MAX_DEPTH_TBL24) { + for (i = (tbl8_group_start + 1); i < tbl8_group_end; + i++) { + + if (tbl8[i].depth != + tbl8[tbl8_group_start].depth) { + + return -EEXIST; + } + } + /* If all entries are the same return the tb8 index */ + return tbl8_group_start; + } + + return -EEXIST; + } + /* + * If the first entry is invalid check if the rest of the entries in + * the tbl8 are invalid. + */ + for (i = (tbl8_group_start + 1); i < tbl8_group_end; i++) { + if (tbl8[i].valid) + return -EEXIST; + } + /* If no valid entries are found then return -EINVAL. */ + return -EINVAL; +} + +static inline int32_t +delete_depth_big_v20(struct rte_lpm_v20 *lpm, uint32_t ip_masked, + uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) +{ + uint32_t tbl24_index, tbl8_group_index, tbl8_group_start, tbl8_index, + tbl8_range, i; + int32_t tbl8_recycle_index; + + /* + * Calculate the index into tbl24 and range. Note: All depths larger + * than MAX_DEPTH_TBL24 are associated with only one tbl24 entry. + */ + tbl24_index = ip_masked >> 8; + + /* Calculate the index into tbl8 and range. */ + tbl8_group_index = lpm->tbl24[tbl24_index].group_idx; + tbl8_group_start = tbl8_group_index * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + tbl8_range = depth_to_range(depth); + + if (sub_rule_index < 0) { + /* + * Loop through the range of entries on tbl8 for which the + * rule_to_delete must be removed or modified. + */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + if (lpm->tbl8[i].depth <= depth) + lpm->tbl8[i].valid = INVALID; + } + } else { + /* Set new tbl8 entry. */ + struct rte_lpm_tbl_entry_v20 new_tbl8_entry = { + .valid = VALID, + .depth = sub_rule_depth, + .valid_group = lpm->tbl8[tbl8_group_start].valid_group, + }; + + new_tbl8_entry.next_hop = + lpm->rules_tbl[sub_rule_index].next_hop; + /* + * Loop through the range of entries on tbl8 for which the + * rule_to_delete must be modified. + */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + if (lpm->tbl8[i].depth <= depth) + lpm->tbl8[i] = new_tbl8_entry; + } + } + + /* + * Check if there are any valid entries in this tbl8 group. If all + * tbl8 entries are invalid we can free the tbl8 and invalidate the + * associated tbl24 entry. + */ + + tbl8_recycle_index = tbl8_recycle_check_v20(lpm->tbl8, tbl8_group_start); + + if (tbl8_recycle_index == -EINVAL) { + /* Set tbl24 before freeing tbl8 to avoid race condition. */ + lpm->tbl24[tbl24_index].valid = 0; + tbl8_free_v20(lpm->tbl8, tbl8_group_start); + } else if (tbl8_recycle_index > -1) { + /* Update tbl24 entry. */ + struct rte_lpm_tbl_entry_v20 new_tbl24_entry = { + { .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, }, + .valid = VALID, + .valid_group = 0, + .depth = lpm->tbl8[tbl8_recycle_index].depth, + }; + + /* Set tbl24 before freeing tbl8 to avoid race condition. */ + lpm->tbl24[tbl24_index] = new_tbl24_entry; + tbl8_free_v20(lpm->tbl8, tbl8_group_start); + } + + return 0; +} + +static inline int32_t +delete_depth_big_v1604(struct rte_lpm *lpm, uint32_t ip_masked, + uint8_t depth, int32_t sub_rule_index, uint8_t sub_rule_depth) +{ +#define group_idx next_hop + uint32_t tbl24_index, tbl8_group_index, tbl8_group_start, tbl8_index, + tbl8_range, i; + int32_t tbl8_recycle_index; + + /* + * Calculate the index into tbl24 and range. Note: All depths larger + * than MAX_DEPTH_TBL24 are associated with only one tbl24 entry. + */ + tbl24_index = ip_masked >> 8; + + /* Calculate the index into tbl8 and range. */ + tbl8_group_index = lpm->tbl24[tbl24_index].group_idx; + tbl8_group_start = tbl8_group_index * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + tbl8_index = tbl8_group_start + (ip_masked & 0xFF); + tbl8_range = depth_to_range(depth); + + if (sub_rule_index < 0) { + /* + * Loop through the range of entries on tbl8 for which the + * rule_to_delete must be removed or modified. + */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + if (lpm->tbl8[i].depth <= depth) + lpm->tbl8[i].valid = INVALID; + } + } else { + /* Set new tbl8 entry. */ + struct rte_lpm_tbl_entry new_tbl8_entry = { + .valid = VALID, + .depth = sub_rule_depth, + .valid_group = lpm->tbl8[tbl8_group_start].valid_group, + .next_hop = lpm->rules_tbl[sub_rule_index].next_hop, + }; + + /* + * Loop through the range of entries on tbl8 for which the + * rule_to_delete must be modified. + */ + for (i = tbl8_index; i < (tbl8_index + tbl8_range); i++) { + if (lpm->tbl8[i].depth <= depth) + lpm->tbl8[i] = new_tbl8_entry; + } + } + + /* + * Check if there are any valid entries in this tbl8 group. If all + * tbl8 entries are invalid we can free the tbl8 and invalidate the + * associated tbl24 entry. + */ + + tbl8_recycle_index = tbl8_recycle_check_v1604(lpm->tbl8, tbl8_group_start); + + if (tbl8_recycle_index == -EINVAL) { + /* Set tbl24 before freeing tbl8 to avoid race condition. */ + lpm->tbl24[tbl24_index].valid = 0; + tbl8_free_v1604(lpm->tbl8, tbl8_group_start); + } else if (tbl8_recycle_index > -1) { + /* Update tbl24 entry. */ + struct rte_lpm_tbl_entry new_tbl24_entry = { + .next_hop = lpm->tbl8[tbl8_recycle_index].next_hop, + .valid = VALID, + .valid_group = 0, + .depth = lpm->tbl8[tbl8_recycle_index].depth, + }; + + /* Set tbl24 before freeing tbl8 to avoid race condition. */ + lpm->tbl24[tbl24_index] = new_tbl24_entry; + tbl8_free_v1604(lpm->tbl8, tbl8_group_start); + } +#undef group_idx + return 0; +} + +/* + * Deletes a rule + */ +int +rte_lpm_delete_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth) +{ + int32_t rule_to_delete_index, sub_rule_index; + uint32_t ip_masked; + uint8_t sub_rule_depth; + /* + * Check input arguments. Note: IP must be a positive integer of 32 + * bits in length therefore it need not be checked. + */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) { + return -EINVAL; + } + + ip_masked = ip & depth_to_mask(depth); + + /* + * Find the index of the input rule, that needs to be deleted, in the + * rule table. + */ + rule_to_delete_index = rule_find_v20(lpm, ip_masked, depth); + + /* + * Check if rule_to_delete_index was found. If no rule was found the + * function rule_find returns -EINVAL. + */ + if (rule_to_delete_index < 0) + return -EINVAL; + + /* Delete the rule from the rule table. */ + rule_delete_v20(lpm, rule_to_delete_index, depth); + + /* + * Find rule to replace the rule_to_delete. If there is no rule to + * replace the rule_to_delete we return -1 and invalidate the table + * entries associated with this rule. + */ + sub_rule_depth = 0; + sub_rule_index = find_previous_rule_v20(lpm, ip, depth, &sub_rule_depth); + + /* + * If the input depth value is less than 25 use function + * delete_depth_small otherwise use delete_depth_big. + */ + if (depth <= MAX_DEPTH_TBL24) { + return delete_depth_small_v20(lpm, ip_masked, depth, + sub_rule_index, sub_rule_depth); + } else { /* If depth > MAX_DEPTH_TBL24 */ + return delete_depth_big_v20(lpm, ip_masked, depth, sub_rule_index, + sub_rule_depth); + } +} +VERSION_SYMBOL(rte_lpm_delete, _v20, 2.0); + +int +rte_lpm_delete_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth) +{ + int32_t rule_to_delete_index, sub_rule_index; + uint32_t ip_masked; + uint8_t sub_rule_depth; + /* + * Check input arguments. Note: IP must be a positive integer of 32 + * bits in length therefore it need not be checked. + */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM_MAX_DEPTH)) { + return -EINVAL; + } + + ip_masked = ip & depth_to_mask(depth); + + /* + * Find the index of the input rule, that needs to be deleted, in the + * rule table. + */ + rule_to_delete_index = rule_find_v1604(lpm, ip_masked, depth); + + /* + * Check if rule_to_delete_index was found. If no rule was found the + * function rule_find returns -EINVAL. + */ + if (rule_to_delete_index < 0) + return -EINVAL; + + /* Delete the rule from the rule table. */ + rule_delete_v1604(lpm, rule_to_delete_index, depth); + + /* + * Find rule to replace the rule_to_delete. If there is no rule to + * replace the rule_to_delete we return -1 and invalidate the table + * entries associated with this rule. + */ + sub_rule_depth = 0; + sub_rule_index = find_previous_rule_v1604(lpm, ip, depth, &sub_rule_depth); + + /* + * If the input depth value is less than 25 use function + * delete_depth_small otherwise use delete_depth_big. + */ + if (depth <= MAX_DEPTH_TBL24) { + return delete_depth_small_v1604(lpm, ip_masked, depth, + sub_rule_index, sub_rule_depth); + } else { /* If depth > MAX_DEPTH_TBL24 */ + return delete_depth_big_v1604(lpm, ip_masked, depth, sub_rule_index, + sub_rule_depth); + } +} +BIND_DEFAULT_SYMBOL(rte_lpm_delete, _v1604, 16.04); +MAP_STATIC_SYMBOL(int rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, + uint8_t depth), rte_lpm_delete_v1604); + +/* + * Delete all rules from the LPM table. + */ +void +rte_lpm_delete_all_v20(struct rte_lpm_v20 *lpm) +{ + /* Zero rule information. */ + memset(lpm->rule_info, 0, sizeof(lpm->rule_info)); + + /* Zero tbl24. */ + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + + /* Zero tbl8. */ + memset(lpm->tbl8, 0, sizeof(lpm->tbl8)); + + /* Delete all rules form the rules table. */ + memset(lpm->rules_tbl, 0, sizeof(lpm->rules_tbl[0]) * lpm->max_rules); +} +VERSION_SYMBOL(rte_lpm_delete_all, _v20, 2.0); + +void +rte_lpm_delete_all_v1604(struct rte_lpm *lpm) +{ + /* Zero rule information. */ + memset(lpm->rule_info, 0, sizeof(lpm->rule_info)); + + /* Zero tbl24. */ + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + + /* Zero tbl8. */ + memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) + * RTE_LPM_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s); + + /* Delete all rules form the rules table. */ + memset(lpm->rules_tbl, 0, sizeof(lpm->rules_tbl[0]) * lpm->max_rules); +} +BIND_DEFAULT_SYMBOL(rte_lpm_delete_all, _v1604, 16.04); +MAP_STATIC_SYMBOL(void rte_lpm_delete_all(struct rte_lpm *lpm), + rte_lpm_delete_all_v1604); diff --git a/lib/librte_lpm/rte_lpm.h b/lib/librte_lpm/rte_lpm.h new file mode 100644 index 00000000..2df1d672 --- /dev/null +++ b/lib/librte_lpm/rte_lpm.h @@ -0,0 +1,491 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LPM_H_ +#define _RTE_LPM_H_ + +/** + * @file + * RTE Longest Prefix Match (LPM) + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** Max number of characters in LPM name. */ +#define RTE_LPM_NAMESIZE 32 + +/** Maximum depth value possible for IPv4 LPM. */ +#define RTE_LPM_MAX_DEPTH 32 + +/** @internal Total number of tbl24 entries. */ +#define RTE_LPM_TBL24_NUM_ENTRIES (1 << 24) + +/** @internal Number of entries in a tbl8 group. */ +#define RTE_LPM_TBL8_GROUP_NUM_ENTRIES 256 + +/** @internal Max number of tbl8 groups in the tbl8. */ +#define RTE_LPM_MAX_TBL8_NUM_GROUPS (1 << 24) + +/** @internal Total number of tbl8 groups in the tbl8. */ +#define RTE_LPM_TBL8_NUM_GROUPS 256 + +/** @internal Total number of tbl8 entries. */ +#define RTE_LPM_TBL8_NUM_ENTRIES (RTE_LPM_TBL8_NUM_GROUPS * \ + RTE_LPM_TBL8_GROUP_NUM_ENTRIES) + +/** @internal Macro to enable/disable run-time checks. */ +#if defined(RTE_LIBRTE_LPM_DEBUG) +#define RTE_LPM_RETURN_IF_TRUE(cond, retval) do { \ + if (cond) return (retval); \ +} while (0) +#else +#define RTE_LPM_RETURN_IF_TRUE(cond, retval) +#endif + +/** @internal bitmask with valid and valid_group fields set */ +#define RTE_LPM_VALID_EXT_ENTRY_BITMASK 0x03000000 + +/** Bitmask used to indicate successful lookup */ +#define RTE_LPM_LOOKUP_SUCCESS 0x01000000 + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN +/** @internal Tbl24 entry structure. */ +struct rte_lpm_tbl_entry_v20 { + /** + * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or + * a group index pointing to a tbl8 structure (tbl24 only, when + * valid_group is set) + */ + union { + uint8_t next_hop; + uint8_t group_idx; + }; + /* Using single uint8_t to store 3 values. */ + uint8_t valid :1; /**< Validation flag. */ + /** + * For tbl24: + * - valid_group == 0: entry stores a next hop + * - valid_group == 1: entry stores a group_index pointing to a tbl8 + * For tbl8: + * - valid_group indicates whether the current tbl8 is in use or not + */ + uint8_t valid_group :1; + uint8_t depth :6; /**< Rule depth. */ +}; + +struct rte_lpm_tbl_entry { + /** + * Stores Next hop (tbl8 or tbl24 when valid_group is not set) or + * a group index pointing to a tbl8 structure (tbl24 only, when + * valid_group is set) + */ + uint32_t next_hop :24; + /* Using single uint8_t to store 3 values. */ + uint32_t valid :1; /**< Validation flag. */ + /** + * For tbl24: + * - valid_group == 0: entry stores a next hop + * - valid_group == 1: entry stores a group_index pointing to a tbl8 + * For tbl8: + * - valid_group indicates whether the current tbl8 is in use or not + */ + uint32_t valid_group :1; + uint32_t depth :6; /**< Rule depth. */ +}; + +#else +struct rte_lpm_tbl_entry_v20 { + uint8_t depth :6; + uint8_t valid_group :1; + uint8_t valid :1; + union { + uint8_t group_idx; + uint8_t next_hop; + }; +}; + +struct rte_lpm_tbl_entry { + uint32_t depth :6; + uint32_t valid_group :1; + uint32_t valid :1; + uint32_t next_hop :24; + +}; + +#endif + +/** LPM configuration structure. */ +struct rte_lpm_config { + uint32_t max_rules; /**< Max number of rules. */ + uint32_t number_tbl8s; /**< Number of tbl8s to allocate. */ + int flags; /**< This field is currently unused. */ +}; + +/** @internal Rule structure. */ +struct rte_lpm_rule_v20 { + uint32_t ip; /**< Rule IP address. */ + uint8_t next_hop; /**< Rule next hop. */ +}; + +struct rte_lpm_rule { + uint32_t ip; /**< Rule IP address. */ + uint32_t next_hop; /**< Rule next hop. */ +}; + +/** @internal Contains metadata about the rules table. */ +struct rte_lpm_rule_info { + uint32_t used_rules; /**< Used rules so far. */ + uint32_t first_rule; /**< Indexes the first rule of a given depth. */ +}; + +/** @internal LPM structure. */ +struct rte_lpm_v20 { + /* LPM metadata. */ + char name[RTE_LPM_NAMESIZE]; /**< Name of the lpm. */ + uint32_t max_rules; /**< Max. balanced rules per lpm. */ + struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH]; /**< Rule info table. */ + + /* LPM Tables. */ + struct rte_lpm_tbl_entry_v20 tbl24[RTE_LPM_TBL24_NUM_ENTRIES] + __rte_cache_aligned; /**< LPM tbl24 table. */ + struct rte_lpm_tbl_entry_v20 tbl8[RTE_LPM_TBL8_NUM_ENTRIES] + __rte_cache_aligned; /**< LPM tbl8 table. */ + struct rte_lpm_rule_v20 rules_tbl[0] \ + __rte_cache_aligned; /**< LPM rules. */ +}; + +struct rte_lpm { + /* LPM metadata. */ + char name[RTE_LPM_NAMESIZE]; /**< Name of the lpm. */ + uint32_t max_rules; /**< Max. balanced rules per lpm. */ + uint32_t number_tbl8s; /**< Number of tbl8s. */ + struct rte_lpm_rule_info rule_info[RTE_LPM_MAX_DEPTH]; /**< Rule info table. */ + + /* LPM Tables. */ + struct rte_lpm_tbl_entry tbl24[RTE_LPM_TBL24_NUM_ENTRIES] + __rte_cache_aligned; /**< LPM tbl24 table. */ + struct rte_lpm_tbl_entry *tbl8; /**< LPM tbl8 table. */ + struct rte_lpm_rule *rules_tbl; /**< LPM rules. */ +}; + +/** + * Create an LPM object. + * + * @param name + * LPM object name + * @param socket_id + * NUMA socket ID for LPM table memory allocation + * @param config + * Structure containing the configuration + * @return + * Handle to LPM object on success, NULL otherwise with rte_errno set + * to an appropriate values. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - invalid parameter passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_lpm * +rte_lpm_create(const char *name, int socket_id, + const struct rte_lpm_config *config); +struct rte_lpm_v20 * +rte_lpm_create_v20(const char *name, int socket_id, int max_rules, int flags); +struct rte_lpm * +rte_lpm_create_v1604(const char *name, int socket_id, + const struct rte_lpm_config *config); + +/** + * Find an existing LPM object and return a pointer to it. + * + * @param name + * Name of the lpm object as passed to rte_lpm_create() + * @return + * Pointer to lpm object or NULL if object not found with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_lpm * +rte_lpm_find_existing(const char *name); +struct rte_lpm_v20 * +rte_lpm_find_existing_v20(const char *name); +struct rte_lpm * +rte_lpm_find_existing_v1604(const char *name); + +/** + * Free an LPM object. + * + * @param lpm + * LPM object handle + * @return + * None + */ +void +rte_lpm_free(struct rte_lpm *lpm); +void +rte_lpm_free_v20(struct rte_lpm_v20 *lpm); +void +rte_lpm_free_v1604(struct rte_lpm *lpm); + +/** + * Add a rule to the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be added to the LPM table + * @param depth + * Depth of the rule to be added to the LPM table + * @param next_hop + * Next hop of the rule to be added to the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm_add(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, uint32_t next_hop); +int +rte_lpm_add_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth, + uint8_t next_hop); +int +rte_lpm_add_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, + uint32_t next_hop); + +/** + * Check if a rule is present in the LPM table, + * and provide its next hop if it is. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be searched + * @param depth + * Depth of the rule to searched + * @param next_hop + * Next hop of the rule (valid only if it is found) + * @return + * 1 if the rule exists, 0 if it does not, a negative value on failure + */ +int +rte_lpm_is_rule_present(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, +uint32_t *next_hop); +int +rte_lpm_is_rule_present_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth, +uint8_t *next_hop); +int +rte_lpm_is_rule_present_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth, +uint32_t *next_hop); + +/** + * Delete a rule from the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be deleted from the LPM table + * @param depth + * Depth of the rule to be deleted from the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm_delete(struct rte_lpm *lpm, uint32_t ip, uint8_t depth); +int +rte_lpm_delete_v20(struct rte_lpm_v20 *lpm, uint32_t ip, uint8_t depth); +int +rte_lpm_delete_v1604(struct rte_lpm *lpm, uint32_t ip, uint8_t depth); + +/** + * Delete all rules from the LPM table. + * + * @param lpm + * LPM object handle + */ +void +rte_lpm_delete_all(struct rte_lpm *lpm); +void +rte_lpm_delete_all_v20(struct rte_lpm_v20 *lpm); +void +rte_lpm_delete_all_v1604(struct rte_lpm *lpm); + +/** + * Lookup an IP into the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP to be looked up in the LPM table + * @param next_hop + * Next hop of the most specific rule found for IP (valid on lookup hit only) + * @return + * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit + */ +static inline int +rte_lpm_lookup(struct rte_lpm *lpm, uint32_t ip, uint32_t *next_hop) +{ + unsigned tbl24_index = (ip >> 8); + uint32_t tbl_entry; + const uint32_t *ptbl; + + /* DEBUG: Check user input arguments. */ + RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (next_hop == NULL)), -EINVAL); + + /* Copy tbl24 entry */ + ptbl = (const uint32_t *)(&lpm->tbl24[tbl24_index]); + tbl_entry = *ptbl; + + /* Copy tbl8 entry (only if needed) */ + if (unlikely((tbl_entry & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + + unsigned tbl8_index = (uint8_t)ip + + (((uint32_t)tbl_entry & 0x00FFFFFF) * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); + + ptbl = (const uint32_t *)&lpm->tbl8[tbl8_index]; + tbl_entry = *ptbl; + } + + *next_hop = ((uint32_t)tbl_entry & 0x00FFFFFF); + return (tbl_entry & RTE_LPM_LOOKUP_SUCCESS) ? 0 : -ENOENT; +} + +/** + * Lookup multiple IP addresses in an LPM table. This may be implemented as a + * macro, so the address of the function should not be used. + * + * @param lpm + * LPM object handle + * @param ips + * Array of IPs to be looked up in the LPM table + * @param next_hops + * Next hop of the most specific rule found for IP (valid on lookup hit only). + * This is an array of two byte values. The most significant byte in each + * value says whether the lookup was successful (bitmask + * RTE_LPM_LOOKUP_SUCCESS is set). The least significant byte is the + * actual next hop. + * @param n + * Number of elements in ips (and next_hops) array to lookup. This should be a + * compile time constant, and divisible by 8 for best performance. + * @return + * -EINVAL for incorrect arguments, otherwise 0 + */ +#define rte_lpm_lookup_bulk(lpm, ips, next_hops, n) \ + rte_lpm_lookup_bulk_func(lpm, ips, next_hops, n) + +static inline int +rte_lpm_lookup_bulk_func(const struct rte_lpm *lpm, const uint32_t *ips, + uint32_t *next_hops, const unsigned n) +{ + unsigned i; + unsigned tbl24_indexes[n]; + const uint32_t *ptbl; + + /* DEBUG: Check user input arguments. */ + RTE_LPM_RETURN_IF_TRUE(((lpm == NULL) || (ips == NULL) || + (next_hops == NULL)), -EINVAL); + + for (i = 0; i < n; i++) { + tbl24_indexes[i] = ips[i] >> 8; + } + + for (i = 0; i < n; i++) { + /* Simply copy tbl24 entry to output */ + ptbl = (const uint32_t *)&lpm->tbl24[tbl24_indexes[i]]; + next_hops[i] = *ptbl; + + /* Overwrite output with tbl8 entry if needed */ + if (unlikely((next_hops[i] & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + + unsigned tbl8_index = (uint8_t)ips[i] + + (((uint32_t)next_hops[i] & 0x00FFFFFF) * + RTE_LPM_TBL8_GROUP_NUM_ENTRIES); + + ptbl = (const uint32_t *)&lpm->tbl8[tbl8_index]; + next_hops[i] = *ptbl; + } + } + return 0; +} + +/* Mask four results. */ +#define RTE_LPM_MASKX4_RES UINT64_C(0x00ffffff00ffffff) + +/** + * Lookup four IP addresses in an LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * Four IPs to be looked up in the LPM table + * @param hop + * Next hop of the most specific rule found for IP (valid on lookup hit only). + * This is an 4 elements array of two byte values. + * If the lookup was succesfull for the given IP, then least significant byte + * of the corresponding element is the actual next hop and the most + * significant byte is zero. + * If the lookup for the given IP failed, then corresponding element would + * contain default value, see description of then next parameter. + * @param defv + * Default value to populate into corresponding element of hop[] array, + * if lookup would fail. + */ +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv); + +#if defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) +#include "rte_lpm_neon.h" +#else +#include "rte_lpm_sse.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_H_ */ diff --git a/lib/librte_lpm/rte_lpm6.c b/lib/librte_lpm/rte_lpm6.c new file mode 100644 index 00000000..ba4353ce --- /dev/null +++ b/lib/librte_lpm/rte_lpm6.c @@ -0,0 +1,883 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_lpm6.h" + +#define RTE_LPM6_TBL24_NUM_ENTRIES (1 << 24) +#define RTE_LPM6_TBL8_GROUP_NUM_ENTRIES 256 +#define RTE_LPM6_TBL8_MAX_NUM_GROUPS (1 << 21) + +#define RTE_LPM6_VALID_EXT_ENTRY_BITMASK 0xA0000000 +#define RTE_LPM6_LOOKUP_SUCCESS 0x20000000 +#define RTE_LPM6_TBL8_BITMASK 0x001FFFFF + +#define ADD_FIRST_BYTE 3 +#define LOOKUP_FIRST_BYTE 4 +#define BYTE_SIZE 8 +#define BYTES2_SIZE 16 + +#define lpm6_tbl8_gindex next_hop + +/** Flags for setting an entry as valid/invalid. */ +enum valid_flag { + INVALID = 0, + VALID +}; + +TAILQ_HEAD(rte_lpm6_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_lpm6_tailq = { + .name = "RTE_LPM6", +}; +EAL_REGISTER_TAILQ(rte_lpm6_tailq) + +/** Tbl entry structure. It is the same for both tbl24 and tbl8 */ +struct rte_lpm6_tbl_entry { + uint32_t next_hop: 21; /**< Next hop / next table to be checked. */ + uint32_t depth :8; /**< Rule depth. */ + + /* Flags. */ + uint32_t valid :1; /**< Validation flag. */ + uint32_t valid_group :1; /**< Group validation flag. */ + uint32_t ext_entry :1; /**< External entry. */ +}; + +/** Rules tbl entry structure. */ +struct rte_lpm6_rule { + uint8_t ip[RTE_LPM6_IPV6_ADDR_SIZE]; /**< Rule IP address. */ + uint8_t next_hop; /**< Rule next hop. */ + uint8_t depth; /**< Rule depth. */ +}; + +/** LPM6 structure. */ +struct rte_lpm6 { + /* LPM metadata. */ + char name[RTE_LPM6_NAMESIZE]; /**< Name of the lpm. */ + uint32_t max_rules; /**< Max number of rules. */ + uint32_t used_rules; /**< Used rules so far. */ + uint32_t number_tbl8s; /**< Number of tbl8s to allocate. */ + uint32_t next_tbl8; /**< Next tbl8 to be used. */ + + /* LPM Tables. */ + struct rte_lpm6_rule *rules_tbl; /**< LPM rules. */ + struct rte_lpm6_tbl_entry tbl24[RTE_LPM6_TBL24_NUM_ENTRIES] + __rte_cache_aligned; /**< LPM tbl24 table. */ + struct rte_lpm6_tbl_entry tbl8[0] + __rte_cache_aligned; /**< LPM tbl8 table. */ +}; + +/* + * Takes an array of uint8_t (IPv6 address) and masks it using the depth. + * It leaves untouched one bit per unit in the depth variable + * and set the rest to 0. + */ +static inline void +mask_ip(uint8_t *ip, uint8_t depth) +{ + int16_t part_depth, mask; + int i; + + part_depth = depth; + + for (i = 0; i < RTE_LPM6_IPV6_ADDR_SIZE; i++) { + if (part_depth < BYTE_SIZE && part_depth >= 0) { + mask = (uint16_t)(~(UINT8_MAX >> part_depth)); + ip[i] = (uint8_t)(ip[i] & mask); + } else if (part_depth < 0) { + ip[i] = 0; + } + part_depth -= BYTE_SIZE; + } +} + +/* + * Allocates memory for LPM object + */ +struct rte_lpm6 * +rte_lpm6_create(const char *name, int socket_id, + const struct rte_lpm6_config *config) +{ + char mem_name[RTE_LPM6_NAMESIZE]; + struct rte_lpm6 *lpm = NULL; + struct rte_tailq_entry *te; + uint64_t mem_size, rules_size; + struct rte_lpm6_list *lpm_list; + + lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list); + + RTE_BUILD_BUG_ON(sizeof(struct rte_lpm6_tbl_entry) != sizeof(uint32_t)); + + /* Check user arguments. */ + if ((name == NULL) || (socket_id < -1) || (config == NULL) || + (config->max_rules == 0) || + config->number_tbl8s > RTE_LPM6_TBL8_MAX_NUM_GROUPS) { + rte_errno = EINVAL; + return NULL; + } + + snprintf(mem_name, sizeof(mem_name), "LPM_%s", name); + + /* Determine the amount of memory to allocate. */ + mem_size = sizeof(*lpm) + (sizeof(lpm->tbl8[0]) * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * config->number_tbl8s); + rules_size = sizeof(struct rte_lpm6_rule) * config->max_rules; + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* Guarantee there's no existing */ + TAILQ_FOREACH(te, lpm_list, next) { + lpm = (struct rte_lpm6 *) te->data; + if (strncmp(name, lpm->name, RTE_LPM6_NAMESIZE) == 0) + break; + } + lpm = NULL; + if (te != NULL) { + rte_errno = EEXIST; + goto exit; + } + + /* allocate tailq entry */ + te = rte_zmalloc("LPM6_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, LPM, "Failed to allocate tailq entry!\n"); + goto exit; + } + + /* Allocate memory to store the LPM data structures. */ + lpm = (struct rte_lpm6 *)rte_zmalloc_socket(mem_name, (size_t)mem_size, + RTE_CACHE_LINE_SIZE, socket_id); + + if (lpm == NULL) { + RTE_LOG(ERR, LPM, "LPM memory allocation failed\n"); + rte_free(te); + goto exit; + } + + lpm->rules_tbl = (struct rte_lpm6_rule *)rte_zmalloc_socket(NULL, + (size_t)rules_size, RTE_CACHE_LINE_SIZE, socket_id); + + if (lpm->rules_tbl == NULL) { + RTE_LOG(ERR, LPM, "LPM rules_tbl allocation failed\n"); + rte_free(lpm); + lpm = NULL; + rte_free(te); + goto exit; + } + + /* Save user arguments. */ + lpm->max_rules = config->max_rules; + lpm->number_tbl8s = config->number_tbl8s; + snprintf(lpm->name, sizeof(lpm->name), "%s", name); + + te->data = (void *) lpm; + + TAILQ_INSERT_TAIL(lpm_list, te, next); + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return lpm; +} + +/* + * Find an existing lpm table and return a pointer to it. + */ +struct rte_lpm6 * +rte_lpm6_find_existing(const char *name) +{ + struct rte_lpm6 *l = NULL; + struct rte_tailq_entry *te; + struct rte_lpm6_list *lpm_list; + + lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, lpm_list, next) { + l = (struct rte_lpm6 *) te->data; + if (strncmp(name, l->name, RTE_LPM6_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return l; +} + +/* + * Deallocates memory for given LPM table. + */ +void +rte_lpm6_free(struct rte_lpm6 *lpm) +{ + struct rte_lpm6_list *lpm_list; + struct rte_tailq_entry *te; + + /* Check user arguments. */ + if (lpm == NULL) + return; + + lpm_list = RTE_TAILQ_CAST(rte_lpm6_tailq.head, rte_lpm6_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find our tailq entry */ + TAILQ_FOREACH(te, lpm_list, next) { + if (te->data == (void *) lpm) + break; + } + + if (te != NULL) + TAILQ_REMOVE(lpm_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(lpm->rules_tbl); + rte_free(lpm); + rte_free(te); +} + +/* + * Checks if a rule already exists in the rules table and updates + * the nexthop if so. Otherwise it adds a new rule if enough space is available. + */ +static inline int32_t +rule_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t next_hop, uint8_t depth) +{ + uint32_t rule_index; + + /* Scan through rule list to see if rule already exists. */ + for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) { + + /* If rule already exists update its next_hop and return. */ + if ((memcmp (lpm->rules_tbl[rule_index].ip, ip, + RTE_LPM6_IPV6_ADDR_SIZE) == 0) && + lpm->rules_tbl[rule_index].depth == depth) { + lpm->rules_tbl[rule_index].next_hop = next_hop; + + return rule_index; + } + } + + /* + * If rule does not exist check if there is space to add a new rule to + * this rule group. If there is no space return error. + */ + if (lpm->used_rules == lpm->max_rules) { + return -ENOSPC; + } + + /* If there is space for the new rule add it. */ + rte_memcpy(lpm->rules_tbl[rule_index].ip, ip, RTE_LPM6_IPV6_ADDR_SIZE); + lpm->rules_tbl[rule_index].next_hop = next_hop; + lpm->rules_tbl[rule_index].depth = depth; + + /* Increment the used rules counter for this rule group. */ + lpm->used_rules++; + + return rule_index; +} + +/* + * Function that expands a rule across the data structure when a less-generic + * one has been added before. It assures that every possible combination of bits + * in the IP address returns a match. + */ +static void +expand_rule(struct rte_lpm6 *lpm, uint32_t tbl8_gindex, uint8_t depth, + uint8_t next_hop) +{ + uint32_t tbl8_group_end, tbl8_gindex_next, j; + + tbl8_group_end = tbl8_gindex + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + + struct rte_lpm6_tbl_entry new_tbl8_entry = { + .valid = VALID, + .valid_group = VALID, + .depth = depth, + .next_hop = next_hop, + .ext_entry = 0, + }; + + for (j = tbl8_gindex; j < tbl8_group_end; j++) { + if (!lpm->tbl8[j].valid || (lpm->tbl8[j].ext_entry == 0 + && lpm->tbl8[j].depth <= depth)) { + + lpm->tbl8[j] = new_tbl8_entry; + + } else if (lpm->tbl8[j].ext_entry == 1) { + + tbl8_gindex_next = lpm->tbl8[j].lpm6_tbl8_gindex + * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + expand_rule(lpm, tbl8_gindex_next, depth, next_hop); + } + } +} + +/* + * Partially adds a new route to the data structure (tbl24+tbl8s). + * It returns 0 on success, a negative number on failure, or 1 if + * the process needs to be continued by calling the function again. + */ +static inline int +add_step(struct rte_lpm6 *lpm, struct rte_lpm6_tbl_entry *tbl, + struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, uint8_t bytes, + uint8_t first_byte, uint8_t depth, uint8_t next_hop) +{ + uint32_t tbl_index, tbl_range, tbl8_group_start, tbl8_group_end, i; + int32_t tbl8_gindex; + int8_t bitshift; + uint8_t bits_covered; + + /* + * Calculate index to the table based on the number and position + * of the bytes being inspected in this step. + */ + tbl_index = 0; + for (i = first_byte; i < (uint32_t)(first_byte + bytes); i++) { + bitshift = (int8_t)((bytes - i)*BYTE_SIZE); + + if (bitshift < 0) bitshift = 0; + tbl_index = tbl_index | ip[i-1] << bitshift; + } + + /* Number of bits covered in this step */ + bits_covered = (uint8_t)((bytes+first_byte-1)*BYTE_SIZE); + + /* + * If depth if smaller than this number (ie this is the last step) + * expand the rule across the relevant positions in the table. + */ + if (depth <= bits_covered) { + tbl_range = 1 << (bits_covered - depth); + + for (i = tbl_index; i < (tbl_index + tbl_range); i++) { + if (!tbl[i].valid || (tbl[i].ext_entry == 0 && + tbl[i].depth <= depth)) { + + struct rte_lpm6_tbl_entry new_tbl_entry = { + .next_hop = next_hop, + .depth = depth, + .valid = VALID, + .valid_group = VALID, + .ext_entry = 0, + }; + + tbl[i] = new_tbl_entry; + + } else if (tbl[i].ext_entry == 1) { + + /* + * If tbl entry is valid and extended calculate the index + * into next tbl8 and expand the rule across the data structure. + */ + tbl8_gindex = tbl[i].lpm6_tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + expand_rule(lpm, tbl8_gindex, depth, next_hop); + } + } + + return 0; + } + /* + * If this is not the last step just fill one position + * and calculate the index to the next table. + */ + else { + /* If it's invalid a new tbl8 is needed */ + if (!tbl[tbl_index].valid) { + if (lpm->next_tbl8 < lpm->number_tbl8s) + tbl8_gindex = (lpm->next_tbl8)++; + else + return -ENOSPC; + + struct rte_lpm6_tbl_entry new_tbl_entry = { + .lpm6_tbl8_gindex = tbl8_gindex, + .depth = 0, + .valid = VALID, + .valid_group = VALID, + .ext_entry = 1, + }; + + tbl[tbl_index] = new_tbl_entry; + } + /* + * If it's valid but not extended the rule that was stored * + * here needs to be moved to the next table. + */ + else if (tbl[tbl_index].ext_entry == 0) { + /* Search for free tbl8 group. */ + if (lpm->next_tbl8 < lpm->number_tbl8s) + tbl8_gindex = (lpm->next_tbl8)++; + else + return -ENOSPC; + + tbl8_group_start = tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + tbl8_group_end = tbl8_group_start + + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES; + + /* Populate new tbl8 with tbl value. */ + for (i = tbl8_group_start; i < tbl8_group_end; i++) { + lpm->tbl8[i].valid = VALID; + lpm->tbl8[i].depth = tbl[tbl_index].depth; + lpm->tbl8[i].next_hop = tbl[tbl_index].next_hop; + lpm->tbl8[i].ext_entry = 0; + } + + /* + * Update tbl entry to point to new tbl8 entry. Note: The + * ext_flag and tbl8_index need to be updated simultaneously, + * so assign whole structure in one go. + */ + struct rte_lpm6_tbl_entry new_tbl_entry = { + .lpm6_tbl8_gindex = tbl8_gindex, + .depth = 0, + .valid = VALID, + .valid_group = VALID, + .ext_entry = 1, + }; + + tbl[tbl_index] = new_tbl_entry; + } + + *tbl_next = &(lpm->tbl8[tbl[tbl_index].lpm6_tbl8_gindex * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES]); + } + + return 1; +} + +/* + * Add a route + */ +int +rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint8_t next_hop) +{ + struct rte_lpm6_tbl_entry *tbl; + struct rte_lpm6_tbl_entry *tbl_next; + int32_t rule_index; + int status; + uint8_t masked_ip[RTE_LPM6_IPV6_ADDR_SIZE]; + int i; + + /* Check user arguments. */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) + return -EINVAL; + + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(masked_ip, ip, RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(masked_ip, depth); + + /* Add the rule to the rule table. */ + rule_index = rule_add(lpm, masked_ip, next_hop, depth); + + /* If there is no space available for new rule return error. */ + if (rule_index < 0) { + return rule_index; + } + + /* Inspect the first three bytes through tbl24 on the first step. */ + tbl = lpm->tbl24; + status = add_step (lpm, tbl, &tbl_next, masked_ip, ADD_FIRST_BYTE, 1, + depth, next_hop); + if (status < 0) { + rte_lpm6_delete(lpm, masked_ip, depth); + + return status; + } + + /* + * Inspect one by one the rest of the bytes until + * the process is completed. + */ + for (i = ADD_FIRST_BYTE; i < RTE_LPM6_IPV6_ADDR_SIZE && status == 1; i++) { + tbl = tbl_next; + status = add_step (lpm, tbl, &tbl_next, masked_ip, 1, (uint8_t)(i+1), + depth, next_hop); + if (status < 0) { + rte_lpm6_delete(lpm, masked_ip, depth); + + return status; + } + } + + return status; +} + +/* + * Takes a pointer to a table entry and inspect one level. + * The function returns 0 on lookup success, ENOENT if no match was found + * or 1 if the process needs to be continued by calling the function again. + */ +static inline int +lookup_step(const struct rte_lpm6 *lpm, const struct rte_lpm6_tbl_entry *tbl, + const struct rte_lpm6_tbl_entry **tbl_next, uint8_t *ip, + uint8_t first_byte, uint8_t *next_hop) +{ + uint32_t tbl8_index, tbl_entry; + + /* Take the integer value from the pointer. */ + tbl_entry = *(const uint32_t *)tbl; + + /* If it is valid and extended we calculate the new pointer to return. */ + if ((tbl_entry & RTE_LPM6_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM6_VALID_EXT_ENTRY_BITMASK) { + + tbl8_index = ip[first_byte-1] + + ((tbl_entry & RTE_LPM6_TBL8_BITMASK) * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES); + + *tbl_next = &lpm->tbl8[tbl8_index]; + + return 1; + } else { + /* If not extended then we can have a match. */ + *next_hop = (uint8_t)tbl_entry; + return (tbl_entry & RTE_LPM6_LOOKUP_SUCCESS) ? 0 : -ENOENT; + } +} + +/* + * Looks up an IP + */ +int +rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop) +{ + const struct rte_lpm6_tbl_entry *tbl; + const struct rte_lpm6_tbl_entry *tbl_next; + int status; + uint8_t first_byte; + uint32_t tbl24_index; + + /* DEBUG: Check user input arguments. */ + if ((lpm == NULL) || (ip == NULL) || (next_hop == NULL)) { + return -EINVAL; + } + + first_byte = LOOKUP_FIRST_BYTE; + tbl24_index = (ip[0] << BYTES2_SIZE) | (ip[1] << BYTE_SIZE) | ip[2]; + + /* Calculate pointer to the first entry to be inspected */ + tbl = &lpm->tbl24[tbl24_index]; + + do { + /* Continue inspecting following levels until success or failure */ + status = lookup_step(lpm, tbl, &tbl_next, ip, first_byte++, next_hop); + tbl = tbl_next; + } while (status == 1); + + return status; +} + +/* + * Looks up a group of IP addresses + */ +int +rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int16_t * next_hops, unsigned n) +{ + unsigned i; + const struct rte_lpm6_tbl_entry *tbl; + const struct rte_lpm6_tbl_entry *tbl_next; + uint32_t tbl24_index; + uint8_t first_byte, next_hop; + int status; + + /* DEBUG: Check user input arguments. */ + if ((lpm == NULL) || (ips == NULL) || (next_hops == NULL)) { + return -EINVAL; + } + + for (i = 0; i < n; i++) { + first_byte = LOOKUP_FIRST_BYTE; + tbl24_index = (ips[i][0] << BYTES2_SIZE) | + (ips[i][1] << BYTE_SIZE) | ips[i][2]; + + /* Calculate pointer to the first entry to be inspected */ + tbl = &lpm->tbl24[tbl24_index]; + + do { + /* Continue inspecting following levels until success or failure */ + status = lookup_step(lpm, tbl, &tbl_next, ips[i], first_byte++, + &next_hop); + tbl = tbl_next; + } while (status == 1); + + if (status < 0) + next_hops[i] = -1; + else + next_hops[i] = next_hop; + } + + return 0; +} + +/* + * Finds a rule in rule table. + * NOTE: Valid range for depth parameter is 1 .. 128 inclusive. + */ +static inline int32_t +rule_find(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth) +{ + uint32_t rule_index; + + /* Scan used rules at given depth to find rule. */ + for (rule_index = 0; rule_index < lpm->used_rules; rule_index++) { + /* If rule is found return the rule index. */ + if ((memcmp (lpm->rules_tbl[rule_index].ip, ip, + RTE_LPM6_IPV6_ADDR_SIZE) == 0) && + lpm->rules_tbl[rule_index].depth == depth) { + + return rule_index; + } + } + + /* If rule is not found return -ENOENT. */ + return -ENOENT; +} + +/* + * Look for a rule in the high-level rules table + */ +int +rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, +uint8_t *next_hop) +{ + uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE]; + int32_t rule_index; + + /* Check user arguments. */ + if ((lpm == NULL) || next_hop == NULL || ip == NULL || + (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) + return -EINVAL; + + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(ip_masked, depth); + + /* Look for the rule using rule_find. */ + rule_index = rule_find(lpm, ip_masked, depth); + + if (rule_index >= 0) { + *next_hop = lpm->rules_tbl[rule_index].next_hop; + return 1; + } + + /* If rule is not found return 0. */ + return 0; +} + +/* + * Delete a rule from the rule table. + * NOTE: Valid range for depth parameter is 1 .. 128 inclusive. + */ +static inline void +rule_delete(struct rte_lpm6 *lpm, int32_t rule_index) +{ + /* + * Overwrite redundant rule with last rule in group and decrement rule + * counter. + */ + lpm->rules_tbl[rule_index] = lpm->rules_tbl[lpm->used_rules-1]; + lpm->used_rules--; +} + +/* + * Deletes a rule + */ +int +rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth) +{ + int32_t rule_to_delete_index; + uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE]; + unsigned i; + + /* + * Check input arguments. + */ + if ((lpm == NULL) || (depth < 1) || (depth > RTE_LPM6_MAX_DEPTH)) { + return -EINVAL; + } + + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(ip_masked, ip, RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(ip_masked, depth); + + /* + * Find the index of the input rule, that needs to be deleted, in the + * rule table. + */ + rule_to_delete_index = rule_find(lpm, ip_masked, depth); + + /* + * Check if rule_to_delete_index was found. If no rule was found the + * function rule_find returns -ENOENT. + */ + if (rule_to_delete_index < 0) + return rule_to_delete_index; + + /* Delete the rule from the rule table. */ + rule_delete(lpm, rule_to_delete_index); + + /* + * Set all the table entries to 0 (ie delete every rule + * from the data structure. + */ + lpm->next_tbl8 = 0; + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) + * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s); + + /* + * Add every rule again (except for the one that was removed from + * the rules table). + */ + for (i = 0; i < lpm->used_rules; i++) { + rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth, + lpm->rules_tbl[i].next_hop); + } + + return 0; +} + +/* + * Deletes a group of rules + */ +int +rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n) +{ + int32_t rule_to_delete_index; + uint8_t ip_masked[RTE_LPM6_IPV6_ADDR_SIZE]; + unsigned i; + + /* + * Check input arguments. + */ + if ((lpm == NULL) || (ips == NULL) || (depths == NULL)) { + return -EINVAL; + } + + for (i = 0; i < n; i++) { + /* Copy the IP and mask it to avoid modifying user's input data. */ + memcpy(ip_masked, ips[i], RTE_LPM6_IPV6_ADDR_SIZE); + mask_ip(ip_masked, depths[i]); + + /* + * Find the index of the input rule, that needs to be deleted, in the + * rule table. + */ + rule_to_delete_index = rule_find(lpm, ip_masked, depths[i]); + + /* + * Check if rule_to_delete_index was found. If no rule was found the + * function rule_find returns -ENOENT. + */ + if (rule_to_delete_index < 0) + continue; + + /* Delete the rule from the rule table. */ + rule_delete(lpm, rule_to_delete_index); + } + + /* + * Set all the table entries to 0 (ie delete every rule + * from the data structure. + */ + lpm->next_tbl8 = 0; + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) + * RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s); + + /* + * Add every rule again (except for the ones that were removed from + * the rules table). + */ + for (i = 0; i < lpm->used_rules; i++) { + rte_lpm6_add(lpm, lpm->rules_tbl[i].ip, lpm->rules_tbl[i].depth, + lpm->rules_tbl[i].next_hop); + } + + return 0; +} + +/* + * Delete all rules from the LPM table. + */ +void +rte_lpm6_delete_all(struct rte_lpm6 *lpm) +{ + /* Zero used rules counter. */ + lpm->used_rules = 0; + + /* Zero next tbl8 index. */ + lpm->next_tbl8 = 0; + + /* Zero tbl24. */ + memset(lpm->tbl24, 0, sizeof(lpm->tbl24)); + + /* Zero tbl8. */ + memset(lpm->tbl8, 0, sizeof(lpm->tbl8[0]) * + RTE_LPM6_TBL8_GROUP_NUM_ENTRIES * lpm->number_tbl8s); + + /* Delete all rules form the rules table. */ + memset(lpm->rules_tbl, 0, sizeof(struct rte_lpm6_rule) * lpm->max_rules); +} diff --git a/lib/librte_lpm/rte_lpm6.h b/lib/librte_lpm/rte_lpm6.h new file mode 100644 index 00000000..cedcea8d --- /dev/null +++ b/lib/librte_lpm/rte_lpm6.h @@ -0,0 +1,227 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _RTE_LPM6_H_ +#define _RTE_LPM6_H_ + +/** + * @file + * RTE Longest Prefix Match for IPv6 (LPM6) + */ + +#ifdef __cplusplus +extern "C" { +#endif + + +#define RTE_LPM6_MAX_DEPTH 128 +#define RTE_LPM6_IPV6_ADDR_SIZE 16 +/** Max number of characters in LPM name. */ +#define RTE_LPM6_NAMESIZE 32 + +/** LPM structure. */ +struct rte_lpm6; + +/** LPM configuration structure. */ +struct rte_lpm6_config { + uint32_t max_rules; /**< Max number of rules. */ + uint32_t number_tbl8s; /**< Number of tbl8s to allocate. */ + int flags; /**< This field is currently unused. */ +}; + +/** + * Create an LPM object. + * + * @param name + * LPM object name + * @param socket_id + * NUMA socket ID for LPM table memory allocation + * @param config + * Structure containing the configuration + * @return + * Handle to LPM object on success, NULL otherwise with rte_errno set + * to an appropriate values. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - invalid parameter passed to function + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_lpm6 * +rte_lpm6_create(const char *name, int socket_id, + const struct rte_lpm6_config *config); + +/** + * Find an existing LPM object and return a pointer to it. + * + * @param name + * Name of the lpm object as passed to rte_lpm6_create() + * @return + * Pointer to lpm object or NULL if object not found with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_lpm6 * +rte_lpm6_find_existing(const char *name); + +/** + * Free an LPM object. + * + * @param lpm + * LPM object handle + * @return + * None + */ +void +rte_lpm6_free(struct rte_lpm6 *lpm); + +/** + * Add a rule to the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be added to the LPM table + * @param depth + * Depth of the rule to be added to the LPM table + * @param next_hop + * Next hop of the rule to be added to the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm6_add(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, + uint8_t next_hop); + +/** + * Check if a rule is present in the LPM table, + * and provide its next hop if it is. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be searched + * @param depth + * Depth of the rule to searched + * @param next_hop + * Next hop of the rule (valid only if it is found) + * @return + * 1 if the rule exists, 0 if it does not, a negative value on failure + */ +int +rte_lpm6_is_rule_present(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth, +uint8_t *next_hop); + +/** + * Delete a rule from the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP of the rule to be deleted from the LPM table + * @param depth + * Depth of the rule to be deleted from the LPM table + * @return + * 0 on success, negative value otherwise + */ +int +rte_lpm6_delete(struct rte_lpm6 *lpm, uint8_t *ip, uint8_t depth); + +/** + * Delete a rule from the LPM table. + * + * @param lpm + * LPM object handle + * @param ips + * Array of IPs to be deleted from the LPM table + * @param depths + * Array of depths of the rules to be deleted from the LPM table + * @param n + * Number of rules to be deleted from the LPM table + * @return + * 0 on success, negative value otherwise. + */ +int +rte_lpm6_delete_bulk_func(struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], uint8_t *depths, unsigned n); + +/** + * Delete all rules from the LPM table. + * + * @param lpm + * LPM object handle + */ +void +rte_lpm6_delete_all(struct rte_lpm6 *lpm); + +/** + * Lookup an IP into the LPM table. + * + * @param lpm + * LPM object handle + * @param ip + * IP to be looked up in the LPM table + * @param next_hop + * Next hop of the most specific rule found for IP (valid on lookup hit only) + * @return + * -EINVAL for incorrect arguments, -ENOENT on lookup miss, 0 on lookup hit + */ +int +rte_lpm6_lookup(const struct rte_lpm6 *lpm, uint8_t *ip, uint8_t *next_hop); + +/** + * Lookup multiple IP addresses in an LPM table. + * + * @param lpm + * LPM object handle + * @param ips + * Array of IPs to be looked up in the LPM table + * @param next_hops + * Next hop of the most specific rule found for IP (valid on lookup hit only). + * This is an array of two byte values. The next hop will be stored on + * each position on success; otherwise the position will be set to -1. + * @param n + * Number of elements in ips (and next_hops) array to lookup. + * @return + * -EINVAL for incorrect arguments, otherwise 0 + */ +int +rte_lpm6_lookup_bulk_func(const struct rte_lpm6 *lpm, + uint8_t ips[][RTE_LPM6_IPV6_ADDR_SIZE], + int16_t * next_hops, unsigned n); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_lpm/rte_lpm_neon.h b/lib/librte_lpm/rte_lpm_neon.h new file mode 100644 index 00000000..7c643159 --- /dev/null +++ b/lib/librte_lpm/rte_lpm_neon.h @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Cavium Networks. All rights reserved. + * All rights reserved. + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Derived rte_lpm_lookupx4 implementation from lib/librte_lpm/rte_lpm_sse.h + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium Networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LPM_NEON_H_ +#define _RTE_LPM_NEON_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + uint32x4_t i24; + rte_xmm_t i8; + uint32_t tbl[4]; + uint64_t idx, pt, pt2; + const uint32_t *ptbl; + + const uint32_t mask = UINT8_MAX; + const int32x4_t mask8 = vdupq_n_s32(mask); + + /* + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries + * as one 64-bit value (0x0300000003000000). + */ + const uint64_t mask_xv = + ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32); + + /* + * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries + * as one 64-bit value (0x0100000001000000). + */ + const uint64_t mask_v = + ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32); + + /* get 4 indexes for tbl24[]. */ + i24 = vshrq_n_u32((uint32x4_t)ip, CHAR_BIT); + + /* extract values from tbl24[] */ + idx = vgetq_lane_u64((uint64x2_t)i24, 0); + + ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[0] = *ptbl; + ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32]; + tbl[1] = *ptbl; + + idx = vgetq_lane_u64((uint64x2_t)i24, 1); + + ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[2] = *ptbl; + ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32]; + tbl[3] = *ptbl; + + /* get 4 indexes for tbl8[]. */ + i8.x = vandq_s32(ip, mask8); + + pt = (uint64_t)tbl[0] | + (uint64_t)tbl[1] << 32; + pt2 = (uint64_t)tbl[2] | + (uint64_t)tbl[3] << 32; + + /* search successfully finished for all 4 IP addresses. */ + if (likely((pt & mask_xv) == mask_v) && + likely((pt2 & mask_xv) == mask_v)) { + *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES; + *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES; + return; + } + + if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[0] = i8.u32[0] + + (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; + tbl[0] = *ptbl; + } + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[1] = i8.u32[1] + + (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; + tbl[1] = *ptbl; + } + if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[2] = i8.u32[2] + + (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; + tbl[2] = *ptbl; + } + if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[3] = i8.u32[3] + + (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; + tbl[3] = *ptbl; + } + + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv; + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv; + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv; + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_NEON_H_ */ diff --git a/lib/librte_lpm/rte_lpm_sse.h b/lib/librte_lpm/rte_lpm_sse.h new file mode 100644 index 00000000..da830995 --- /dev/null +++ b/lib/librte_lpm/rte_lpm_sse.h @@ -0,0 +1,149 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LPM_SSE_H_ +#define _RTE_LPM_SSE_H_ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline void +rte_lpm_lookupx4(const struct rte_lpm *lpm, xmm_t ip, uint32_t hop[4], + uint32_t defv) +{ + __m128i i24; + rte_xmm_t i8; + uint32_t tbl[4]; + uint64_t idx, pt, pt2; + const uint32_t *ptbl; + + const __m128i mask8 = + _mm_set_epi32(UINT8_MAX, UINT8_MAX, UINT8_MAX, UINT8_MAX); + + /* + * RTE_LPM_VALID_EXT_ENTRY_BITMASK for 2 LPM entries + * as one 64-bit value (0x0300000003000000). + */ + const uint64_t mask_xv = + ((uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK | + (uint64_t)RTE_LPM_VALID_EXT_ENTRY_BITMASK << 32); + + /* + * RTE_LPM_LOOKUP_SUCCESS for 2 LPM entries + * as one 64-bit value (0x0100000001000000). + */ + const uint64_t mask_v = + ((uint64_t)RTE_LPM_LOOKUP_SUCCESS | + (uint64_t)RTE_LPM_LOOKUP_SUCCESS << 32); + + /* get 4 indexes for tbl24[]. */ + i24 = _mm_srli_epi32(ip, CHAR_BIT); + + /* extract values from tbl24[] */ + idx = _mm_cvtsi128_si64(i24); + i24 = _mm_srli_si128(i24, sizeof(uint64_t)); + + ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[0] = *ptbl; + ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32]; + tbl[1] = *ptbl; + + idx = _mm_cvtsi128_si64(i24); + + ptbl = (const uint32_t *)&lpm->tbl24[(uint32_t)idx]; + tbl[2] = *ptbl; + ptbl = (const uint32_t *)&lpm->tbl24[idx >> 32]; + tbl[3] = *ptbl; + + /* get 4 indexes for tbl8[]. */ + i8.x = _mm_and_si128(ip, mask8); + + pt = (uint64_t)tbl[0] | + (uint64_t)tbl[1] << 32; + pt2 = (uint64_t)tbl[2] | + (uint64_t)tbl[3] << 32; + + /* search successfully finished for all 4 IP addresses. */ + if (likely((pt & mask_xv) == mask_v) && + likely((pt2 & mask_xv) == mask_v)) { + *(uint64_t *)hop = pt & RTE_LPM_MASKX4_RES; + *(uint64_t *)(hop + 2) = pt2 & RTE_LPM_MASKX4_RES; + return; + } + + if (unlikely((pt & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[0] = i8.u32[0] + + (uint8_t)tbl[0] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[0]]; + tbl[0] = *ptbl; + } + if (unlikely((pt >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[1] = i8.u32[1] + + (uint8_t)tbl[1] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[1]]; + tbl[1] = *ptbl; + } + if (unlikely((pt2 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[2] = i8.u32[2] + + (uint8_t)tbl[2] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[2]]; + tbl[2] = *ptbl; + } + if (unlikely((pt2 >> 32 & RTE_LPM_VALID_EXT_ENTRY_BITMASK) == + RTE_LPM_VALID_EXT_ENTRY_BITMASK)) { + i8.u32[3] = i8.u32[3] + + (uint8_t)tbl[3] * RTE_LPM_TBL8_GROUP_NUM_ENTRIES; + ptbl = (const uint32_t *)&lpm->tbl8[i8.u32[3]]; + tbl[3] = *ptbl; + } + + hop[0] = (tbl[0] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[0] & 0x00FFFFFF : defv; + hop[1] = (tbl[1] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[1] & 0x00FFFFFF : defv; + hop[2] = (tbl[2] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[2] & 0x00FFFFFF : defv; + hop[3] = (tbl[3] & RTE_LPM_LOOKUP_SUCCESS) ? tbl[3] & 0x00FFFFFF : defv; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LPM_SSE_H_ */ diff --git a/lib/librte_lpm/rte_lpm_version.map b/lib/librte_lpm/rte_lpm_version.map new file mode 100644 index 00000000..239b371e --- /dev/null +++ b/lib/librte_lpm/rte_lpm_version.map @@ -0,0 +1,36 @@ +DPDK_2.0 { + global: + + rte_lpm_add; + rte_lpm_create; + rte_lpm_delete; + rte_lpm_delete_all; + rte_lpm_find_existing; + rte_lpm_free; + rte_lpm_is_rule_present; + rte_lpm6_add; + rte_lpm6_create; + rte_lpm6_delete; + rte_lpm6_delete_all; + rte_lpm6_delete_bulk_func; + rte_lpm6_find_existing; + rte_lpm6_free; + rte_lpm6_is_rule_present; + rte_lpm6_lookup; + rte_lpm6_lookup_bulk_func; + + local: *; +}; + +DPDK_16.04 { + global: + + rte_lpm_add; + rte_lpm_find_existing; + rte_lpm_create; + rte_lpm_free; + rte_lpm_is_rule_present; + rte_lpm_delete; + rte_lpm_delete_all; + +} DPDK_2.0; diff --git a/lib/librte_mbuf/Makefile b/lib/librte_mbuf/Makefile new file mode 100644 index 00000000..8d62b0d3 --- /dev/null +++ b/lib/librte_mbuf/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_mbuf.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_mbuf_version.map + +LIBABIVER := 2 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_MBUF) := rte_mbuf.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_MBUF)-include := rte_mbuf.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_MBUF) += lib/librte_eal lib/librte_mempool + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_mbuf/rte_mbuf.c b/lib/librte_mbuf/rte_mbuf.c new file mode 100644 index 00000000..dc0467c9 --- /dev/null +++ b/lib/librte_mbuf/rte_mbuf.c @@ -0,0 +1,288 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * ctrlmbuf constructor, given as a callback function to + * rte_mempool_create() + */ +void +rte_ctrlmbuf_init(struct rte_mempool *mp, + __attribute__((unused)) void *opaque_arg, + void *_m, + __attribute__((unused)) unsigned i) +{ + struct rte_mbuf *m = _m; + rte_pktmbuf_init(mp, opaque_arg, _m, i); + m->ol_flags |= CTRL_MBUF_FLAG; +} + +/* + * pktmbuf pool constructor, given as a callback function to + * rte_mempool_create() + */ +void +rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg) +{ + struct rte_pktmbuf_pool_private *user_mbp_priv, *mbp_priv; + struct rte_pktmbuf_pool_private default_mbp_priv; + uint16_t roomsz; + + RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf)); + + /* if no structure is provided, assume no mbuf private area */ + user_mbp_priv = opaque_arg; + if (user_mbp_priv == NULL) { + default_mbp_priv.mbuf_priv_size = 0; + if (mp->elt_size > sizeof(struct rte_mbuf)) + roomsz = mp->elt_size - sizeof(struct rte_mbuf); + else + roomsz = 0; + default_mbp_priv.mbuf_data_room_size = roomsz; + user_mbp_priv = &default_mbp_priv; + } + + RTE_MBUF_ASSERT(mp->elt_size >= sizeof(struct rte_mbuf) + + user_mbp_priv->mbuf_data_room_size + + user_mbp_priv->mbuf_priv_size); + + mbp_priv = rte_mempool_get_priv(mp); + memcpy(mbp_priv, user_mbp_priv, sizeof(*mbp_priv)); +} + +/* + * pktmbuf constructor, given as a callback function to + * rte_mempool_create(). + * Set the fields of a packet mbuf to their default values. + */ +void +rte_pktmbuf_init(struct rte_mempool *mp, + __attribute__((unused)) void *opaque_arg, + void *_m, + __attribute__((unused)) unsigned i) +{ + struct rte_mbuf *m = _m; + uint32_t mbuf_size, buf_len, priv_size; + + priv_size = rte_pktmbuf_priv_size(mp); + mbuf_size = sizeof(struct rte_mbuf) + priv_size; + buf_len = rte_pktmbuf_data_room_size(mp); + + RTE_MBUF_ASSERT(RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) == priv_size); + RTE_MBUF_ASSERT(mp->elt_size >= mbuf_size); + RTE_MBUF_ASSERT(buf_len <= UINT16_MAX); + + memset(m, 0, mp->elt_size); + + /* start of buffer is after mbuf structure and priv data */ + m->priv_size = priv_size; + m->buf_addr = (char *)m + mbuf_size; + m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; + m->buf_len = (uint16_t)buf_len; + + /* keep some headroom between start of buffer and data */ + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + + /* init some constant fields */ + m->pool = mp; + m->nb_segs = 1; + m->port = 0xff; +} + +/* helper to create a mbuf pool */ +struct rte_mempool * +rte_pktmbuf_pool_create(const char *name, unsigned n, + unsigned cache_size, uint16_t priv_size, uint16_t data_room_size, + int socket_id) +{ + struct rte_pktmbuf_pool_private mbp_priv; + unsigned elt_size; + + if (RTE_ALIGN(priv_size, RTE_MBUF_PRIV_ALIGN) != priv_size) { + RTE_LOG(ERR, MBUF, "mbuf priv_size=%u is not aligned\n", + priv_size); + rte_errno = EINVAL; + return NULL; + } + elt_size = sizeof(struct rte_mbuf) + (unsigned)priv_size + + (unsigned)data_room_size; + mbp_priv.mbuf_data_room_size = data_room_size; + mbp_priv.mbuf_priv_size = priv_size; + + return rte_mempool_create(name, n, elt_size, + cache_size, sizeof(struct rte_pktmbuf_pool_private), + rte_pktmbuf_pool_init, &mbp_priv, rte_pktmbuf_init, NULL, + socket_id, 0); +} + +/* do some sanity checks on a mbuf: panic if it fails */ +void +rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header) +{ + const struct rte_mbuf *m_seg; + unsigned nb_segs; + + if (m == NULL) + rte_panic("mbuf is NULL\n"); + + /* generic checks */ + if (m->pool == NULL) + rte_panic("bad mbuf pool\n"); + if (m->buf_physaddr == 0) + rte_panic("bad phys addr\n"); + if (m->buf_addr == NULL) + rte_panic("bad virt addr\n"); + + uint16_t cnt = rte_mbuf_refcnt_read(m); + if ((cnt == 0) || (cnt == UINT16_MAX)) + rte_panic("bad ref cnt\n"); + + /* nothing to check for sub-segments */ + if (is_header == 0) + return; + + nb_segs = m->nb_segs; + m_seg = m; + while (m_seg && nb_segs != 0) { + m_seg = m_seg->next; + nb_segs--; + } + if (nb_segs != 0) + rte_panic("bad nb_segs\n"); +} + +/* dump a mbuf on console */ +void +rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len) +{ + unsigned int len; + unsigned nb_segs; + + __rte_mbuf_sanity_check(m, 1); + + fprintf(f, "dump mbuf at 0x%p, phys=%"PRIx64", buf_len=%u\n", + m, (uint64_t)m->buf_physaddr, (unsigned)m->buf_len); + fprintf(f, " pkt_len=%"PRIu32", ol_flags=%"PRIx64", nb_segs=%u, " + "in_port=%u\n", m->pkt_len, m->ol_flags, + (unsigned)m->nb_segs, (unsigned)m->port); + nb_segs = m->nb_segs; + + while (m && nb_segs != 0) { + __rte_mbuf_sanity_check(m, 0); + + fprintf(f, " segment at 0x%p, data=0x%p, data_len=%u\n", + m, rte_pktmbuf_mtod(m, void *), (unsigned)m->data_len); + len = dump_len; + if (len > m->data_len) + len = m->data_len; + if (len != 0) + rte_hexdump(f, NULL, rte_pktmbuf_mtod(m, void *), len); + dump_len -= len; + m = m->next; + nb_segs --; + } +} + +/* + * Get the name of a RX offload flag. Must be kept synchronized with flag + * definitions in rte_mbuf.h. + */ +const char *rte_get_rx_ol_flag_name(uint64_t mask) +{ + switch (mask) { + case PKT_RX_VLAN_PKT: return "PKT_RX_VLAN_PKT"; + case PKT_RX_RSS_HASH: return "PKT_RX_RSS_HASH"; + case PKT_RX_FDIR: return "PKT_RX_FDIR"; + case PKT_RX_L4_CKSUM_BAD: return "PKT_RX_L4_CKSUM_BAD"; + case PKT_RX_IP_CKSUM_BAD: return "PKT_RX_IP_CKSUM_BAD"; + case PKT_RX_EIP_CKSUM_BAD: return "PKT_RX_EIP_CKSUM_BAD"; + /* case PKT_RX_OVERSIZE: return "PKT_RX_OVERSIZE"; */ + /* case PKT_RX_HBUF_OVERFLOW: return "PKT_RX_HBUF_OVERFLOW"; */ + /* case PKT_RX_RECIP_ERR: return "PKT_RX_RECIP_ERR"; */ + /* case PKT_RX_MAC_ERR: return "PKT_RX_MAC_ERR"; */ + case PKT_RX_IEEE1588_PTP: return "PKT_RX_IEEE1588_PTP"; + case PKT_RX_IEEE1588_TMST: return "PKT_RX_IEEE1588_TMST"; + default: return NULL; + } +} + +/* + * Get the name of a TX offload flag. Must be kept synchronized with flag + * definitions in rte_mbuf.h. + */ +const char *rte_get_tx_ol_flag_name(uint64_t mask) +{ + switch (mask) { + case PKT_TX_VLAN_PKT: return "PKT_TX_VLAN_PKT"; + case PKT_TX_IP_CKSUM: return "PKT_TX_IP_CKSUM"; + case PKT_TX_TCP_CKSUM: return "PKT_TX_TCP_CKSUM"; + case PKT_TX_SCTP_CKSUM: return "PKT_TX_SCTP_CKSUM"; + case PKT_TX_UDP_CKSUM: return "PKT_TX_UDP_CKSUM"; + case PKT_TX_IEEE1588_TMST: return "PKT_TX_IEEE1588_TMST"; + case PKT_TX_TCP_SEG: return "PKT_TX_TCP_SEG"; + case PKT_TX_IPV4: return "PKT_TX_IPV4"; + case PKT_TX_IPV6: return "PKT_TX_IPV6"; + case PKT_TX_OUTER_IP_CKSUM: return "PKT_TX_OUTER_IP_CKSUM"; + case PKT_TX_OUTER_IPV4: return "PKT_TX_OUTER_IPV4"; + case PKT_TX_OUTER_IPV6: return "PKT_TX_OUTER_IPV6"; + default: return NULL; + } +} diff --git a/lib/librte_mbuf/rte_mbuf.h b/lib/librte_mbuf/rte_mbuf.h new file mode 100644 index 00000000..75a227d8 --- /dev/null +++ b/lib/librte_mbuf/rte_mbuf.h @@ -0,0 +1,1946 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MBUF_H_ +#define _RTE_MBUF_H_ + +/** + * @file + * RTE Mbuf + * + * The mbuf library provides the ability to create and destroy buffers + * that may be used by the RTE application to store message + * buffers. The message buffers are stored in a mempool, using the + * RTE mempool library. + * + * This library provide an API to allocate/free packet mbufs, which are + * used to carry network packets. + * + * To understand the concepts of packet buffers or mbufs, you + * should read "TCP/IP Illustrated, Volume 2: The Implementation, + * Addison-Wesley, 1995, ISBN 0-201-63354-X from Richard Stevens" + * http://www.kohala.com/start/tcpipiv2.html + */ + +#include +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* deprecated options */ +#pragma GCC poison RTE_MBUF_SCATTER_GATHER +#pragma GCC poison RTE_MBUF_REFCNT + +/* + * Packet Offload Features Flags. It also carry packet type information. + * Critical resources. Both rx/tx shared these bits. Be cautious on any change + * + * - RX flags start at bit position zero, and get added to the left of previous + * flags. + * - The most-significant 3 bits are reserved for generic mbuf flags + * - TX flags therefore start at bit position 60 (i.e. 63-3), and new flags get + * added to the right of the previously defined flags i.e. they should count + * downwards, not upwards. + * + * Keep these flags synchronized with rte_get_rx_ol_flag_name() and + * rte_get_tx_ol_flag_name(). + */ +#define PKT_RX_VLAN_PKT (1ULL << 0) /**< RX packet is a 802.1q VLAN packet. */ +#define PKT_RX_RSS_HASH (1ULL << 1) /**< RX packet with RSS hash result. */ +#define PKT_RX_FDIR (1ULL << 2) /**< RX packet with FDIR match indicate. */ +#define PKT_RX_L4_CKSUM_BAD (1ULL << 3) /**< L4 cksum of RX pkt. is not OK. */ +#define PKT_RX_IP_CKSUM_BAD (1ULL << 4) /**< IP cksum of RX pkt. is not OK. */ +#define PKT_RX_EIP_CKSUM_BAD (1ULL << 5) /**< External IP header checksum error. */ +#define PKT_RX_OVERSIZE (0ULL << 0) /**< Num of desc of an RX pkt oversize. */ +#define PKT_RX_HBUF_OVERFLOW (0ULL << 0) /**< Header buffer overflow. */ +#define PKT_RX_RECIP_ERR (0ULL << 0) /**< Hardware processing error. */ +#define PKT_RX_MAC_ERR (0ULL << 0) /**< MAC error. */ +#define PKT_RX_IEEE1588_PTP (1ULL << 9) /**< RX IEEE1588 L2 Ethernet PT Packet. */ +#define PKT_RX_IEEE1588_TMST (1ULL << 10) /**< RX IEEE1588 L2/L4 timestamped packet.*/ +#define PKT_RX_FDIR_ID (1ULL << 13) /**< FD id reported if FDIR match. */ +#define PKT_RX_FDIR_FLX (1ULL << 14) /**< Flexible bytes reported if FDIR match. */ +#define PKT_RX_QINQ_PKT (1ULL << 15) /**< RX packet with double VLAN stripped. */ +/* add new RX flags here */ + +/* add new TX flags here */ + +/** + * Second VLAN insertion (QinQ) flag. + */ +#define PKT_TX_QINQ_PKT (1ULL << 49) /**< TX packet with double VLAN inserted. */ + +/** + * TCP segmentation offload. To enable this offload feature for a + * packet to be transmitted on hardware supporting TSO: + * - set the PKT_TX_TCP_SEG flag in mbuf->ol_flags (this flag implies + * PKT_TX_TCP_CKSUM) + * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 + * - if it's IPv4, set the PKT_TX_IP_CKSUM flag and write the IP checksum + * to 0 in the packet + * - fill the mbuf offload information: l2_len, l3_len, l4_len, tso_segsz + * - calculate the pseudo header checksum without taking ip_len in account, + * and set it in the TCP header. Refer to rte_ipv4_phdr_cksum() and + * rte_ipv6_phdr_cksum() that can be used as helpers. + */ +#define PKT_TX_TCP_SEG (1ULL << 50) + +#define PKT_TX_IEEE1588_TMST (1ULL << 51) /**< TX IEEE1588 packet to timestamp. */ + +/** + * Bits 52+53 used for L4 packet type with checksum enabled: 00: Reserved, + * 01: TCP checksum, 10: SCTP checksum, 11: UDP checksum. To use hardware + * L4 checksum offload, the user needs to: + * - fill l2_len and l3_len in mbuf + * - set the flags PKT_TX_TCP_CKSUM, PKT_TX_SCTP_CKSUM or PKT_TX_UDP_CKSUM + * - set the flag PKT_TX_IPV4 or PKT_TX_IPV6 + * - calculate the pseudo header checksum and set it in the L4 header (only + * for TCP or UDP). See rte_ipv4_phdr_cksum() and rte_ipv6_phdr_cksum(). + * For SCTP, set the crc field to 0. + */ +#define PKT_TX_L4_NO_CKSUM (0ULL << 52) /**< Disable L4 cksum of TX pkt. */ +#define PKT_TX_TCP_CKSUM (1ULL << 52) /**< TCP cksum of TX pkt. computed by NIC. */ +#define PKT_TX_SCTP_CKSUM (2ULL << 52) /**< SCTP cksum of TX pkt. computed by NIC. */ +#define PKT_TX_UDP_CKSUM (3ULL << 52) /**< UDP cksum of TX pkt. computed by NIC. */ +#define PKT_TX_L4_MASK (3ULL << 52) /**< Mask for L4 cksum offload request. */ + +/** + * Offload the IP checksum in the hardware. The flag PKT_TX_IPV4 should + * also be set by the application, although a PMD will only check + * PKT_TX_IP_CKSUM. + * - set the IP checksum field in the packet to 0 + * - fill the mbuf offload information: l2_len, l3_len + */ +#define PKT_TX_IP_CKSUM (1ULL << 54) + +/** + * Packet is IPv4. This flag must be set when using any offload feature + * (TSO, L3 or L4 checksum) to tell the NIC that the packet is an IPv4 + * packet. If the packet is a tunneled packet, this flag is related to + * the inner headers. + */ +#define PKT_TX_IPV4 (1ULL << 55) + +/** + * Packet is IPv6. This flag must be set when using an offload feature + * (TSO or L4 checksum) to tell the NIC that the packet is an IPv6 + * packet. If the packet is a tunneled packet, this flag is related to + * the inner headers. + */ +#define PKT_TX_IPV6 (1ULL << 56) + +#define PKT_TX_VLAN_PKT (1ULL << 57) /**< TX packet is a 802.1q VLAN packet. */ + +/** + * Offload the IP checksum of an external header in the hardware. The + * flag PKT_TX_OUTER_IPV4 should also be set by the application, alto ugh + * a PMD will only check PKT_TX_IP_CKSUM. The IP checksum field in the + * packet must be set to 0. + * - set the outer IP checksum field in the packet to 0 + * - fill the mbuf offload information: outer_l2_len, outer_l3_len + */ +#define PKT_TX_OUTER_IP_CKSUM (1ULL << 58) + +/** + * Packet outer header is IPv4. This flag must be set when using any + * outer offload feature (L3 or L4 checksum) to tell the NIC that the + * outer header of the tunneled packet is an IPv4 packet. + */ +#define PKT_TX_OUTER_IPV4 (1ULL << 59) + +/** + * Packet outer header is IPv6. This flag must be set when using any + * outer offload feature (L4 checksum) to tell the NIC that the outer + * header of the tunneled packet is an IPv6 packet. + */ +#define PKT_TX_OUTER_IPV6 (1ULL << 60) + +#define __RESERVED (1ULL << 61) /**< reserved for future mbuf use */ + +#define IND_ATTACHED_MBUF (1ULL << 62) /**< Indirect attached mbuf */ + +/* Use final bit of flags to indicate a control mbuf */ +#define CTRL_MBUF_FLAG (1ULL << 63) /**< Mbuf contains control data */ + +/* + * 32 bits are divided into several fields to mark packet types. Note that + * each field is indexical. + * - Bit 3:0 is for L2 types. + * - Bit 7:4 is for L3 or outer L3 (for tunneling case) types. + * - Bit 11:8 is for L4 or outer L4 (for tunneling case) types. + * - Bit 15:12 is for tunnel types. + * - Bit 19:16 is for inner L2 types. + * - Bit 23:20 is for inner L3 types. + * - Bit 27:24 is for inner L4 types. + * - Bit 31:28 is reserved. + * + * To be compatible with Vector PMD, RTE_PTYPE_L3_IPV4, RTE_PTYPE_L3_IPV4_EXT, + * RTE_PTYPE_L3_IPV6, RTE_PTYPE_L3_IPV6_EXT, RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP + * and RTE_PTYPE_L4_SCTP should be kept as below in a contiguous 7 bits. + * + * Note that L3 types values are selected for checking IPV4/IPV6 header from + * performance point of view. Reading annotations of RTE_ETH_IS_IPV4_HDR and + * RTE_ETH_IS_IPV6_HDR is needed for any future changes of L3 type values. + * + * Note that the packet types of the same packet recognized by different + * hardware may be different, as different hardware may have different + * capability of packet type recognition. + * + * examples: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=0x29 + * | 'version'=6, 'next header'=0x3A + * | 'ICMPv6 header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV4_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_IP | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_ICMP. + * + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x2F + * | 'GRE header' + * | 'version'=6, 'next header'=0x11 + * | 'UDP header'> + * will be recognized on i40e hardware as packet type combination of, + * RTE_PTYPE_L2_ETHER | + * RTE_PTYPE_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_TUNNEL_GRENAT | + * RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN | + * RTE_PTYPE_INNER_L4_UDP. + */ +#define RTE_PTYPE_UNKNOWN 0x00000000 +/** + * Ethernet packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=[0x0800|0x86DD]> + */ +#define RTE_PTYPE_L2_ETHER 0x00000001 +/** + * Ethernet packet type for time sync. + * + * Packet format: + * <'ether type'=0x88F7> + */ +#define RTE_PTYPE_L2_ETHER_TIMESYNC 0x00000002 +/** + * ARP (Address Resolution Protocol) packet type. + * + * Packet format: + * <'ether type'=0x0806> + */ +#define RTE_PTYPE_L2_ETHER_ARP 0x00000003 +/** + * LLDP (Link Layer Discovery Protocol) packet type. + * + * Packet format: + * <'ether type'=0x88CC> + */ +#define RTE_PTYPE_L2_ETHER_LLDP 0x00000004 +/** + * Mask of layer 2 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L2_MASK 0x0000000f +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * header option. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_L3_IPV4 0x00000010 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and contains header + * options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_L3_IPV4_EXT 0x00000030 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and does not contain any + * extension header. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_L3_IPV6 0x00000040 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * header options. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_L3_IPV4_EXT_UNKNOWN 0x00000090 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and contains extension + * headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_L3_IPV6_EXT 0x000000c0 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for outer packet for tunneling cases, and may or maynot contain + * extension headers. + * + * Packet format: + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_L3_IPV6_EXT_UNKNOWN 0x000000e0 +/** + * Mask of layer 3 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L3_MASK 0x000000f0 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_L4_TCP 0x00000100 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_L4_UDP 0x00000200 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, which can be recognized as + * fragmented. A fragmented packet cannot be recognized as any other L4 types + * (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP, + * RTE_PTYPE_L4_NONFRAG). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_L4_FRAG 0x00000300 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_L4_SCTP 0x00000400 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_L4_ICMP 0x00000500 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for outer packet for tunneling cases. + * + * It refers to those packets of any IP types, while cannot be recognized as + * any of above L4 types (RTE_PTYPE_L4_TCP, RTE_PTYPE_L4_UDP, + * RTE_PTYPE_L4_FRAG, RTE_PTYPE_L4_SCTP, RTE_PTYPE_L4_ICMP). + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_L4_NONFRAG 0x00000600 +/** + * Mask of layer 4 packet types. + * It is used for outer packet for tunneling cases. + */ +#define RTE_PTYPE_L4_MASK 0x00000f00 +/** + * IP (Internet Protocol) in IP (Internet Protocol) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=[4|41]> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[4|41]> + */ +#define RTE_PTYPE_TUNNEL_IP 0x00001000 +/** + * GRE (Generic Routing Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47> + */ +#define RTE_PTYPE_TUNNEL_GRE 0x00002000 +/** + * VXLAN (Virtual eXtensible Local Area Network) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=4798> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=4798> + */ +#define RTE_PTYPE_TUNNEL_VXLAN 0x00003000 +/** + * NVGRE (Network Virtualization using Generic Routing Encapsulation) tunneling + * packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=47 + * | 'protocol type'=0x6558> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=47 + * | 'protocol type'=0x6558'> + */ +#define RTE_PTYPE_TUNNEL_NVGRE 0x00004000 +/** + * GENEVE (Generic Network Virtualization Encapsulation) tunneling packet type. + * + * Packet format: + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17 + * | 'destination port'=6081> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17 + * | 'destination port'=6081> + */ +#define RTE_PTYPE_TUNNEL_GENEVE 0x00005000 +/** + * Tunneling packet type of Teredo, VXLAN (Virtual eXtensible Local Area + * Network) or GRE (Generic Routing Encapsulation) could be recognized as this + * packet type, if they can not be recognized independently as of hardware + * capability. + */ +#define RTE_PTYPE_TUNNEL_GRENAT 0x00006000 +/** + * Mask of tunneling packet types. + */ +#define RTE_PTYPE_TUNNEL_MASK 0x0000f000 +/** + * Ethernet packet type. + * It is used for inner packet type only. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD]> + */ +#define RTE_PTYPE_INNER_L2_ETHER 0x00010000 +/** + * Ethernet packet type with VLAN (Virtual Local Area Network) tag. + * + * Packet format (inner only): + * <'ether type'=[0x800|0x86DD], vlan=[1-4095]> + */ +#define RTE_PTYPE_INNER_L2_ETHER_VLAN 0x00020000 +/** + * Mask of inner layer 2 packet types. + */ +#define RTE_PTYPE_INNER_L2_MASK 0x000f0000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and does not contain any header option. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=5> + */ +#define RTE_PTYPE_INNER_L3_IPV4 0x00100000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and contains header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[6-15], 'options'> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT 0x00200000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and does not contain any extension header. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=0x3B> + */ +#define RTE_PTYPE_INNER_L3_IPV6 0x00300000 +/** + * IP (Internet Protocol) version 4 packet type. + * It is used for inner packet only, and may or maynot contain header options. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'ihl'=[5-15], <'options'>> + */ +#define RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN 0x00400000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and contains extension headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * 'extension headers'> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT 0x00500000 +/** + * IP (Internet Protocol) version 6 packet type. + * It is used for inner packet only, and may or maynot contain extension + * headers. + * + * Packet format (inner only): + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=[0x3B|0x0|0x2B|0x2C|0x32|0x33|0x3C|0x87], + * <'extension headers'>> + */ +#define RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN 0x00600000 +/** + * Mask of inner layer 3 packet types. + */ +#define RTE_PTYPE_INNER_L3_MASK 0x00f00000 +/** + * TCP (Transmission Control Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=6, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=6> + */ +#define RTE_PTYPE_INNER_L4_TCP 0x01000000 +/** + * UDP (User Datagram Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=17, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=17> + */ +#define RTE_PTYPE_INNER_L4_UDP 0x02000000 +/** + * Fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have layer 4 packet. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'MF'=1> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=44> + */ +#define RTE_PTYPE_INNER_L4_FRAG 0x03000000 +/** + * SCTP (Stream Control Transmission Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=132, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=132> + */ +#define RTE_PTYPE_INNER_L4_SCTP 0x04000000 +/** + * ICMP (Internet Control Message Protocol) packet type. + * It is used for inner packet only. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'=1, 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'=1> + */ +#define RTE_PTYPE_INNER_L4_ICMP 0x05000000 +/** + * Non-fragmented IP (Internet Protocol) packet type. + * It is used for inner packet only, and may or maynot have other unknown layer + * 4 packet types. + * + * Packet format (inner only): + * <'ether type'=0x0800 + * | 'version'=4, 'protocol'!=[6|17|132|1], 'MF'=0> + * or, + * <'ether type'=0x86DD + * | 'version'=6, 'next header'!=[6|17|44|132|1]> + */ +#define RTE_PTYPE_INNER_L4_NONFRAG 0x06000000 +/** + * Mask of inner layer 4 packet types. + */ +#define RTE_PTYPE_INNER_L4_MASK 0x0f000000 + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 4 is selected to be used for IPv4 only. Then checking bit 4 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV4_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV4) + +/** + * Check if the (outer) L3 header is IPv4. To avoid comparing IPv4 types one by + * one, bit 6 is selected to be used for IPv4 only. Then checking bit 6 can + * determine if it is an IPV4 packet. + */ +#define RTE_ETH_IS_IPV6_HDR(ptype) ((ptype) & RTE_PTYPE_L3_IPV6) + +/* Check if it is a tunneling packet */ +#define RTE_ETH_IS_TUNNEL_PKT(ptype) ((ptype) & (RTE_PTYPE_TUNNEL_MASK | \ + RTE_PTYPE_INNER_L2_MASK | \ + RTE_PTYPE_INNER_L3_MASK | \ + RTE_PTYPE_INNER_L4_MASK)) + +/** Alignment constraint of mbuf private area. */ +#define RTE_MBUF_PRIV_ALIGN 8 + +/** + * Get the name of a RX offload flag + * + * @param mask + * The mask describing the flag. + * @return + * The name of this flag, or NULL if it's not a valid RX flag. + */ +const char *rte_get_rx_ol_flag_name(uint64_t mask); + +/** + * Get the name of a TX offload flag + * + * @param mask + * The mask describing the flag. Usually only one bit must be set. + * Several bits can be given if they belong to the same mask. + * Ex: PKT_TX_L4_MASK. + * @return + * The name of this flag, or NULL if it's not a valid TX flag. + */ +const char *rte_get_tx_ol_flag_name(uint64_t mask); + +/** + * Some NICs need at least 2KB buffer to RX standard Ethernet frame without + * splitting it into multiple segments. + * So, for mbufs that planned to be involved into RX/TX, the recommended + * minimal buffer length is 2KB + RTE_PKTMBUF_HEADROOM. + */ +#define RTE_MBUF_DEFAULT_DATAROOM 2048 +#define RTE_MBUF_DEFAULT_BUF_SIZE \ + (RTE_MBUF_DEFAULT_DATAROOM + RTE_PKTMBUF_HEADROOM) + +/* define a set of marker types that can be used to refer to set points in the + * mbuf */ +typedef void *MARKER[0]; /**< generic marker for a point in a structure */ +typedef uint8_t MARKER8[0]; /**< generic marker with 1B alignment */ +typedef uint64_t MARKER64[0]; /**< marker that allows us to overwrite 8 bytes + * with a single assignment */ + +/** + * The generic rte_mbuf, containing a packet mbuf. + */ +struct rte_mbuf { + MARKER cacheline0; + + void *buf_addr; /**< Virtual address of segment buffer. */ + phys_addr_t buf_physaddr; /**< Physical address of segment buffer. */ + + uint16_t buf_len; /**< Length of segment buffer. */ + + /* next 6 bytes are initialised on RX descriptor rearm */ + MARKER8 rearm_data; + uint16_t data_off; + + /** + * 16-bit Reference counter. + * It should only be accessed using the following functions: + * rte_mbuf_refcnt_update(), rte_mbuf_refcnt_read(), and + * rte_mbuf_refcnt_set(). The functionality of these functions (atomic, + * or non-atomic) is controlled by the CONFIG_RTE_MBUF_REFCNT_ATOMIC + * config option. + */ + union { + rte_atomic16_t refcnt_atomic; /**< Atomically accessed refcnt */ + uint16_t refcnt; /**< Non-atomically accessed refcnt */ + }; + uint8_t nb_segs; /**< Number of segments. */ + uint8_t port; /**< Input port. */ + + uint64_t ol_flags; /**< Offload features. */ + + /* remaining bytes are set on RX when pulling packet from descriptor */ + MARKER rx_descriptor_fields1; + + /* + * The packet type, which is the combination of outer/inner L2, L3, L4 + * and tunnel types. + */ + union { + uint32_t packet_type; /**< L2/L3/L4 and tunnel information. */ + struct { + uint32_t l2_type:4; /**< (Outer) L2 type. */ + uint32_t l3_type:4; /**< (Outer) L3 type. */ + uint32_t l4_type:4; /**< (Outer) L4 type. */ + uint32_t tun_type:4; /**< Tunnel type. */ + uint32_t inner_l2_type:4; /**< Inner L2 type. */ + uint32_t inner_l3_type:4; /**< Inner L3 type. */ + uint32_t inner_l4_type:4; /**< Inner L4 type. */ + }; + }; + + uint32_t pkt_len; /**< Total pkt len: sum of all segments. */ + uint16_t data_len; /**< Amount of data in segment buffer. */ + uint16_t vlan_tci; /**< VLAN Tag Control Identifier (CPU order) */ + + union { + uint32_t rss; /**< RSS hash result if RSS enabled */ + struct { + union { + struct { + uint16_t hash; + uint16_t id; + }; + uint32_t lo; + /**< Second 4 flexible bytes */ + }; + uint32_t hi; + /**< First 4 flexible bytes or FD ID, dependent on + PKT_RX_FDIR_* flag in ol_flags. */ + } fdir; /**< Filter identifier if FDIR enabled */ + struct { + uint32_t lo; + uint32_t hi; + } sched; /**< Hierarchical scheduler */ + uint32_t usr; /**< User defined tags. See rte_distributor_process() */ + } hash; /**< hash information */ + + uint32_t seqn; /**< Sequence number. See also rte_reorder_insert() */ + + uint16_t vlan_tci_outer; /**< Outer VLAN Tag Control Identifier (CPU order) */ + + /* second cache line - fields only used in slow path or on TX */ + MARKER cacheline1 __rte_cache_min_aligned; + + union { + void *userdata; /**< Can be used for external metadata */ + uint64_t udata64; /**< Allow 8-byte userdata on 32-bit */ + }; + + struct rte_mempool *pool; /**< Pool from which mbuf was allocated. */ + struct rte_mbuf *next; /**< Next segment of scattered packet. */ + + /* fields to support TX offloads */ + union { + uint64_t tx_offload; /**< combined for easy fetch */ + struct { + uint64_t l2_len:7; /**< L2 (MAC) Header Length. */ + uint64_t l3_len:9; /**< L3 (IP) Header Length. */ + uint64_t l4_len:8; /**< L4 (TCP/UDP) Header Length. */ + uint64_t tso_segsz:16; /**< TCP TSO segment size */ + + /* fields for TX offloading of tunnels */ + uint64_t outer_l3_len:9; /**< Outer L3 (IP) Hdr Length. */ + uint64_t outer_l2_len:7; /**< Outer L2 (MAC) Hdr Length. */ + + /* uint64_t unused:8; */ + }; + }; + + /** Size of the application private data. In case of an indirect + * mbuf, it stores the direct mbuf private data size. */ + uint16_t priv_size; + + /** Timesync flags for use with IEEE1588. */ + uint16_t timesync; +} __rte_cache_aligned; + +static inline uint16_t rte_pktmbuf_priv_size(struct rte_mempool *mp); + +/** + * Return the DMA address of the beginning of the mbuf data + * + * @param mb + * The pointer to the mbuf. + * @return + * The physical address of the beginning of the mbuf data + */ +static inline phys_addr_t +rte_mbuf_data_dma_addr(const struct rte_mbuf *mb) +{ + return mb->buf_physaddr + mb->data_off; +} + +/** + * Return the default DMA address of the beginning of the mbuf data + * + * This function is used by drivers in their receive function, as it + * returns the location where data should be written by the NIC, taking + * the default headroom in account. + * + * @param mb + * The pointer to the mbuf. + * @return + * The physical address of the beginning of the mbuf data + */ +static inline phys_addr_t +rte_mbuf_data_dma_addr_default(const struct rte_mbuf *mb) +{ + return mb->buf_physaddr + RTE_PKTMBUF_HEADROOM; +} + +/** + * Return the mbuf owning the data buffer address of an indirect mbuf. + * + * @param mi + * The pointer to the indirect mbuf. + * @return + * The address of the direct mbuf corresponding to buffer_addr. + */ +static inline struct rte_mbuf * +rte_mbuf_from_indirect(struct rte_mbuf *mi) +{ + return (struct rte_mbuf *)RTE_PTR_SUB(mi->buf_addr, sizeof(*mi) + mi->priv_size); +} + +/** + * Return the buffer address embedded in the given mbuf. + * + * @param md + * The pointer to the mbuf. + * @return + * The address of the data buffer owned by the mbuf. + */ +static inline char * +rte_mbuf_to_baddr(struct rte_mbuf *md) +{ + char *buffer_addr; + buffer_addr = (char *)md + sizeof(*md) + rte_pktmbuf_priv_size(md->pool); + return buffer_addr; +} + +/** + * Returns TRUE if given mbuf is indirect, or FALSE otherwise. + */ +#define RTE_MBUF_INDIRECT(mb) ((mb)->ol_flags & IND_ATTACHED_MBUF) + +/** + * Returns TRUE if given mbuf is direct, or FALSE otherwise. + */ +#define RTE_MBUF_DIRECT(mb) (!RTE_MBUF_INDIRECT(mb)) + +/** + * Private data in case of pktmbuf pool. + * + * A structure that contains some pktmbuf_pool-specific data that are + * appended after the mempool structure (in private data). + */ +struct rte_pktmbuf_pool_private { + uint16_t mbuf_data_room_size; /**< Size of data space in each mbuf. */ + uint16_t mbuf_priv_size; /**< Size of private area in each mbuf. */ +}; + +#ifdef RTE_LIBRTE_MBUF_DEBUG + +/** check mbuf type in debug mode */ +#define __rte_mbuf_sanity_check(m, is_h) rte_mbuf_sanity_check(m, is_h) + +/** check mbuf type in debug mode if mbuf pointer is not null */ +#define __rte_mbuf_sanity_check_raw(m, is_h) do { \ + if ((m) != NULL) \ + rte_mbuf_sanity_check(m, is_h); \ +} while (0) + +/** MBUF asserts in debug mode */ +#define RTE_MBUF_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \ +} + +#else /* RTE_LIBRTE_MBUF_DEBUG */ + +/** check mbuf type in debug mode */ +#define __rte_mbuf_sanity_check(m, is_h) do { } while (0) + +/** check mbuf type in debug mode if mbuf pointer is not null */ +#define __rte_mbuf_sanity_check_raw(m, is_h) do { } while (0) + +/** MBUF asserts in debug mode */ +#define RTE_MBUF_ASSERT(exp) do { } while (0) + +#endif /* RTE_LIBRTE_MBUF_DEBUG */ + +#ifdef RTE_MBUF_REFCNT_ATOMIC + +/** + * Reads the value of an mbuf's refcnt. + * @param m + * Mbuf to read + * @return + * Reference count number. + */ +static inline uint16_t +rte_mbuf_refcnt_read(const struct rte_mbuf *m) +{ + return (uint16_t)(rte_atomic16_read(&m->refcnt_atomic)); +} + +/** + * Sets an mbuf's refcnt to a defined value. + * @param m + * Mbuf to update + * @param new_value + * Value set + */ +static inline void +rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) +{ + rte_atomic16_set(&m->refcnt_atomic, new_value); +} + +/** + * Adds given value to an mbuf's refcnt and returns its new value. + * @param m + * Mbuf to update + * @param value + * Value to add/subtract + * @return + * Updated value + */ +static inline uint16_t +rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) +{ + /* + * The atomic_add is an expensive operation, so we don't want to + * call it in the case where we know we are the uniq holder of + * this mbuf (i.e. ref_cnt == 1). Otherwise, an atomic + * operation has to be used because concurrent accesses on the + * reference counter can occur. + */ + if (likely(rte_mbuf_refcnt_read(m) == 1)) { + rte_mbuf_refcnt_set(m, 1 + value); + return 1 + value; + } + + return (uint16_t)(rte_atomic16_add_return(&m->refcnt_atomic, value)); +} + +#else /* ! RTE_MBUF_REFCNT_ATOMIC */ + +/** + * Adds given value to an mbuf's refcnt and returns its new value. + */ +static inline uint16_t +rte_mbuf_refcnt_update(struct rte_mbuf *m, int16_t value) +{ + m->refcnt = (uint16_t)(m->refcnt + value); + return m->refcnt; +} + +/** + * Reads the value of an mbuf's refcnt. + */ +static inline uint16_t +rte_mbuf_refcnt_read(const struct rte_mbuf *m) +{ + return m->refcnt; +} + +/** + * Sets an mbuf's refcnt to the defined value. + */ +static inline void +rte_mbuf_refcnt_set(struct rte_mbuf *m, uint16_t new_value) +{ + m->refcnt = new_value; +} + +#endif /* RTE_MBUF_REFCNT_ATOMIC */ + +/** Mbuf prefetch */ +#define RTE_MBUF_PREFETCH_TO_FREE(m) do { \ + if ((m) != NULL) \ + rte_prefetch0(m); \ +} while (0) + + +/** + * Sanity checks on an mbuf. + * + * Check the consistency of the given mbuf. The function will cause a + * panic if corruption is detected. + * + * @param m + * The mbuf to be checked. + * @param is_header + * True if the mbuf is a packet header, false if it is a sub-segment + * of a packet (in this case, some fields like nb_segs are not checked) + */ +void +rte_mbuf_sanity_check(const struct rte_mbuf *m, int is_header); + +/** + * @internal Allocate a new mbuf from mempool *mp*. + * The use of that function is reserved for RTE internal needs. + * Please use rte_pktmbuf_alloc(). + * + * @param mp + * The mempool from which mbuf is allocated. + * @return + * - The pointer to the new mbuf on success. + * - NULL if allocation failed. + */ +static inline struct rte_mbuf *__rte_mbuf_raw_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + void *mb = NULL; + if (rte_mempool_get(mp, &mb) < 0) + return NULL; + m = (struct rte_mbuf *)mb; + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0); + rte_mbuf_refcnt_set(m, 1); + return m; +} + +/** + * @internal Put mbuf back into its original mempool. + * The use of that function is reserved for RTE internal needs. + * Please use rte_pktmbuf_free(). + * + * @param m + * The mbuf to be freed. + */ +static inline void __attribute__((always_inline)) +__rte_mbuf_raw_free(struct rte_mbuf *m) +{ + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(m) == 0); + rte_mempool_put(m->pool, m); +} + +/* Operations on ctrl mbuf */ + +/** + * The control mbuf constructor. + * + * This function initializes some fields in an mbuf structure that are + * not modified by the user once created (mbuf type, origin pool, buffer + * start address, and so on). This function is given as a callback function + * to rte_mempool_create() at pool creation time. + * + * @param mp + * The mempool from which the mbuf is allocated. + * @param opaque_arg + * A pointer that can be used by the user to retrieve useful information + * for mbuf initialization. This pointer comes from the ``init_arg`` + * parameter of rte_mempool_create(). + * @param m + * The mbuf to initialize. + * @param i + * The index of the mbuf in the pool table. + */ +void rte_ctrlmbuf_init(struct rte_mempool *mp, void *opaque_arg, + void *m, unsigned i); + +/** + * Allocate a new mbuf (type is ctrl) from mempool *mp*. + * + * This new mbuf is initialized with data pointing to the beginning of + * buffer, and with a length of zero. + * + * @param mp + * The mempool from which the mbuf is allocated. + * @return + * - The pointer to the new mbuf on success. + * - NULL if allocation failed. + */ +#define rte_ctrlmbuf_alloc(mp) rte_pktmbuf_alloc(mp) + +/** + * Free a control mbuf back into its original mempool. + * + * @param m + * The control mbuf to be freed. + */ +#define rte_ctrlmbuf_free(m) rte_pktmbuf_free(m) + +/** + * A macro that returns the pointer to the carried data. + * + * The value that can be read or assigned. + * + * @param m + * The control mbuf. + */ +#define rte_ctrlmbuf_data(m) ((char *)((m)->buf_addr) + (m)->data_off) + +/** + * A macro that returns the length of the carried data. + * + * The value that can be read or assigned. + * + * @param m + * The control mbuf. + */ +#define rte_ctrlmbuf_len(m) rte_pktmbuf_data_len(m) + +/** + * Tests if an mbuf is a control mbuf + * + * @param m + * The mbuf to be tested + * @return + * - True (1) if the mbuf is a control mbuf + * - False(0) otherwise + */ +static inline int +rte_is_ctrlmbuf(struct rte_mbuf *m) +{ + return !!(m->ol_flags & CTRL_MBUF_FLAG); +} + +/* Operations on pkt mbuf */ + +/** + * The packet mbuf constructor. + * + * This function initializes some fields in the mbuf structure that are + * not modified by the user once created (origin pool, buffer start + * address, and so on). This function is given as a callback function to + * rte_mempool_create() at pool creation time. + * + * @param mp + * The mempool from which mbufs originate. + * @param opaque_arg + * A pointer that can be used by the user to retrieve useful information + * for mbuf initialization. This pointer comes from the ``init_arg`` + * parameter of rte_mempool_create(). + * @param m + * The mbuf to initialize. + * @param i + * The index of the mbuf in the pool table. + */ +void rte_pktmbuf_init(struct rte_mempool *mp, void *opaque_arg, + void *m, unsigned i); + + +/** + * A packet mbuf pool constructor. + * + * This function initializes the mempool private data in the case of a + * pktmbuf pool. This private data is needed by the driver. The + * function is given as a callback function to rte_mempool_create() at + * pool creation. It can be extended by the user, for example, to + * provide another packet size. + * + * @param mp + * The mempool from which mbufs originate. + * @param opaque_arg + * A pointer that can be used by the user to retrieve useful information + * for mbuf initialization. This pointer comes from the ``init_arg`` + * parameter of rte_mempool_create(). + */ +void rte_pktmbuf_pool_init(struct rte_mempool *mp, void *opaque_arg); + +/** + * Create a mbuf pool. + * + * This function creates and initializes a packet mbuf pool. It is + * a wrapper to rte_mempool_create() with the proper packet constructor + * and mempool constructor. + * + * @param name + * The name of the mbuf pool. + * @param n + * The number of elements in the mbuf pool. The optimum size (in terms + * of memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param cache_size + * Size of the per-core object cache. See rte_mempool_create() for + * details. + * @param priv_size + * Size of application private are between the rte_mbuf structure + * and the data buffer. This value must be aligned to RTE_MBUF_PRIV_ALIGN. + * @param data_room_size + * Size of data buffer in each mbuf, including RTE_PKTMBUF_HEADROOM. + * @param socket_id + * The socket identifier where the memory should be allocated. The + * value can be *SOCKET_ID_ANY* if there is no NUMA constraint for the + * reserved zone. + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large, or priv_size is not aligned. + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_pktmbuf_pool_create(const char *name, unsigned n, + unsigned cache_size, uint16_t priv_size, uint16_t data_room_size, + int socket_id); + +/** + * Get the data room size of mbufs stored in a pktmbuf_pool + * + * The data room size is the amount of data that can be stored in a + * mbuf including the headroom (RTE_PKTMBUF_HEADROOM). + * + * @param mp + * The packet mbuf pool. + * @return + * The data room size of mbufs stored in this mempool. + */ +static inline uint16_t +rte_pktmbuf_data_room_size(struct rte_mempool *mp) +{ + struct rte_pktmbuf_pool_private *mbp_priv; + + mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp); + return mbp_priv->mbuf_data_room_size; +} + +/** + * Get the application private size of mbufs stored in a pktmbuf_pool + * + * The private size of mbuf is a zone located between the rte_mbuf + * structure and the data buffer where an application can store data + * associated to a packet. + * + * @param mp + * The packet mbuf pool. + * @return + * The private size of mbufs stored in this mempool. + */ +static inline uint16_t +rte_pktmbuf_priv_size(struct rte_mempool *mp) +{ + struct rte_pktmbuf_pool_private *mbp_priv; + + mbp_priv = (struct rte_pktmbuf_pool_private *)rte_mempool_get_priv(mp); + return mbp_priv->mbuf_priv_size; +} + +/** + * Reset the fields of a packet mbuf to their default values. + * + * The given mbuf must have only one segment. + * + * @param m + * The packet mbuf to be resetted. + */ +static inline void rte_pktmbuf_reset(struct rte_mbuf *m) +{ + m->next = NULL; + m->pkt_len = 0; + m->tx_offload = 0; + m->vlan_tci = 0; + m->vlan_tci_outer = 0; + m->nb_segs = 1; + m->port = 0xff; + + m->ol_flags = 0; + m->packet_type = 0; + m->data_off = (RTE_PKTMBUF_HEADROOM <= m->buf_len) ? + RTE_PKTMBUF_HEADROOM : m->buf_len; + + m->data_len = 0; + __rte_mbuf_sanity_check(m, 1); +} + +/** + * Allocate a new mbuf from a mempool. + * + * This new mbuf contains one segment, which has a length of 0. The pointer + * to data is initialized to have some bytes of headroom in the buffer + * (if buffer size allows). + * + * @param mp + * The mempool from which the mbuf is allocated. + * @return + * - The pointer to the new mbuf on success. + * - NULL if allocation failed. + */ +static inline struct rte_mbuf *rte_pktmbuf_alloc(struct rte_mempool *mp) +{ + struct rte_mbuf *m; + if ((m = __rte_mbuf_raw_alloc(mp)) != NULL) + rte_pktmbuf_reset(m); + return m; +} + +/** + * Allocate a bulk of mbufs, initialize refcnt and reset the fields to default + * values. + * + * @param pool + * The mempool from which mbufs are allocated. + * @param mbufs + * Array of pointers to mbufs + * @param count + * Array size + * @return + * - 0: Success + */ +static inline int rte_pktmbuf_alloc_bulk(struct rte_mempool *pool, + struct rte_mbuf **mbufs, unsigned count) +{ + unsigned idx = 0; + int rc; + + rc = rte_mempool_get_bulk(pool, (void **)mbufs, count); + if (unlikely(rc)) + return rc; + + /* To understand duff's device on loop unwinding optimization, see + * https://en.wikipedia.org/wiki/Duff's_device. + * Here while() loop is used rather than do() while{} to avoid extra + * check if count is zero. + */ + switch (count % 4) { + case 0: + while (idx != count) { + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); + rte_mbuf_refcnt_set(mbufs[idx], 1); + rte_pktmbuf_reset(mbufs[idx]); + idx++; + case 3: + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); + rte_mbuf_refcnt_set(mbufs[idx], 1); + rte_pktmbuf_reset(mbufs[idx]); + idx++; + case 2: + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); + rte_mbuf_refcnt_set(mbufs[idx], 1); + rte_pktmbuf_reset(mbufs[idx]); + idx++; + case 1: + RTE_MBUF_ASSERT(rte_mbuf_refcnt_read(mbufs[idx]) == 0); + rte_mbuf_refcnt_set(mbufs[idx], 1); + rte_pktmbuf_reset(mbufs[idx]); + idx++; + } + } + return 0; +} + +/** + * Attach packet mbuf to another packet mbuf. + * + * After attachment we refer the mbuf we attached as 'indirect', + * while mbuf we attached to as 'direct'. + * Right now, not supported: + * - attachment for already indirect mbuf (e.g. - mi has to be direct). + * - mbuf we trying to attach (mi) is used by someone else + * e.g. it's reference counter is greater then 1. + * + * @param mi + * The indirect packet mbuf. + * @param m + * The packet mbuf we're attaching to. + */ +static inline void rte_pktmbuf_attach(struct rte_mbuf *mi, struct rte_mbuf *m) +{ + struct rte_mbuf *md; + + RTE_MBUF_ASSERT(RTE_MBUF_DIRECT(mi) && + rte_mbuf_refcnt_read(mi) == 1); + + /* if m is not direct, get the mbuf that embeds the data */ + if (RTE_MBUF_DIRECT(m)) + md = m; + else + md = rte_mbuf_from_indirect(m); + + rte_mbuf_refcnt_update(md, 1); + mi->priv_size = m->priv_size; + mi->buf_physaddr = m->buf_physaddr; + mi->buf_addr = m->buf_addr; + mi->buf_len = m->buf_len; + + mi->next = m->next; + mi->data_off = m->data_off; + mi->data_len = m->data_len; + mi->port = m->port; + mi->vlan_tci = m->vlan_tci; + mi->vlan_tci_outer = m->vlan_tci_outer; + mi->tx_offload = m->tx_offload; + mi->hash = m->hash; + + mi->next = NULL; + mi->pkt_len = mi->data_len; + mi->nb_segs = 1; + mi->ol_flags = m->ol_flags | IND_ATTACHED_MBUF; + mi->packet_type = m->packet_type; + + __rte_mbuf_sanity_check(mi, 1); + __rte_mbuf_sanity_check(m, 0); +} + +/** + * Detach an indirect packet mbuf. + * + * - restore original mbuf address and length values. + * - reset pktmbuf data and data_len to their default values. + * All other fields of the given packet mbuf will be left intact. + * + * @param m + * The indirect attached packet mbuf. + */ +static inline void rte_pktmbuf_detach(struct rte_mbuf *m) +{ + struct rte_mempool *mp = m->pool; + uint32_t mbuf_size, buf_len, priv_size; + + priv_size = rte_pktmbuf_priv_size(mp); + mbuf_size = sizeof(struct rte_mbuf) + priv_size; + buf_len = rte_pktmbuf_data_room_size(mp); + + m->priv_size = priv_size; + m->buf_addr = (char *)m + mbuf_size; + m->buf_physaddr = rte_mempool_virt2phy(mp, m) + mbuf_size; + m->buf_len = (uint16_t)buf_len; + m->data_off = RTE_MIN(RTE_PKTMBUF_HEADROOM, (uint16_t)m->buf_len); + m->data_len = 0; + m->ol_flags = 0; +} + +static inline struct rte_mbuf* __attribute__((always_inline)) +__rte_pktmbuf_prefree_seg(struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 0); + + if (likely(rte_mbuf_refcnt_update(m, -1) == 0)) { + + /* if this is an indirect mbuf, then + * - detach mbuf + * - free attached mbuf segment + */ + if (RTE_MBUF_INDIRECT(m)) { + struct rte_mbuf *md = rte_mbuf_from_indirect(m); + rte_pktmbuf_detach(m); + if (rte_mbuf_refcnt_update(md, -1) == 0) + __rte_mbuf_raw_free(md); + } + return m; + } + return NULL; +} + +/** + * Free a segment of a packet mbuf into its original mempool. + * + * Free an mbuf, without parsing other segments in case of chained + * buffers. + * + * @param m + * The packet mbuf segment to be freed. + */ +static inline void __attribute__((always_inline)) +rte_pktmbuf_free_seg(struct rte_mbuf *m) +{ + if (likely(NULL != (m = __rte_pktmbuf_prefree_seg(m)))) { + m->next = NULL; + __rte_mbuf_raw_free(m); + } +} + +/** + * Free a packet mbuf back into its original mempool. + * + * Free an mbuf, and all its segments in case of chained buffers. Each + * segment is added back into its original mempool. + * + * @param m + * The packet mbuf to be freed. + */ +static inline void rte_pktmbuf_free(struct rte_mbuf *m) +{ + struct rte_mbuf *m_next; + + __rte_mbuf_sanity_check(m, 1); + + while (m != NULL) { + m_next = m->next; + rte_pktmbuf_free_seg(m); + m = m_next; + } +} + +/** + * Creates a "clone" of the given packet mbuf. + * + * Walks through all segments of the given packet mbuf, and for each of them: + * - Creates a new packet mbuf from the given pool. + * - Attaches newly created mbuf to the segment. + * Then updates pkt_len and nb_segs of the "clone" packet mbuf to match values + * from the original packet mbuf. + * + * @param md + * The packet mbuf to be cloned. + * @param mp + * The mempool from which the "clone" mbufs are allocated. + * @return + * - The pointer to the new "clone" mbuf on success. + * - NULL if allocation fails. + */ +static inline struct rte_mbuf *rte_pktmbuf_clone(struct rte_mbuf *md, + struct rte_mempool *mp) +{ + struct rte_mbuf *mc, *mi, **prev; + uint32_t pktlen; + uint8_t nseg; + + if (unlikely ((mc = rte_pktmbuf_alloc(mp)) == NULL)) + return NULL; + + mi = mc; + prev = &mi->next; + pktlen = md->pkt_len; + nseg = 0; + + do { + nseg++; + rte_pktmbuf_attach(mi, md); + *prev = mi; + prev = &mi->next; + } while ((md = md->next) != NULL && + (mi = rte_pktmbuf_alloc(mp)) != NULL); + + *prev = NULL; + mc->nb_segs = nseg; + mc->pkt_len = pktlen; + + /* Allocation of new indirect segment failed */ + if (unlikely (mi == NULL)) { + rte_pktmbuf_free(mc); + return NULL; + } + + __rte_mbuf_sanity_check(mc, 1); + return mc; +} + +/** + * Adds given value to the refcnt of all packet mbuf segments. + * + * Walks through all segments of given packet mbuf and for each of them + * invokes rte_mbuf_refcnt_update(). + * + * @param m + * The packet mbuf whose refcnt to be updated. + * @param v + * The value to add to the mbuf's segments refcnt. + */ +static inline void rte_pktmbuf_refcnt_update(struct rte_mbuf *m, int16_t v) +{ + __rte_mbuf_sanity_check(m, 1); + + do { + rte_mbuf_refcnt_update(m, v); + } while ((m = m->next) != NULL); +} + +/** + * Get the headroom in a packet mbuf. + * + * @param m + * The packet mbuf. + * @return + * The length of the headroom. + */ +static inline uint16_t rte_pktmbuf_headroom(const struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 1); + return m->data_off; +} + +/** + * Get the tailroom of a packet mbuf. + * + * @param m + * The packet mbuf. + * @return + * The length of the tailroom. + */ +static inline uint16_t rte_pktmbuf_tailroom(const struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 1); + return (uint16_t)(m->buf_len - rte_pktmbuf_headroom(m) - + m->data_len); +} + +/** + * Get the last segment of the packet. + * + * @param m + * The packet mbuf. + * @return + * The last segment of the given mbuf. + */ +static inline struct rte_mbuf *rte_pktmbuf_lastseg(struct rte_mbuf *m) +{ + struct rte_mbuf *m2 = (struct rte_mbuf *)m; + + __rte_mbuf_sanity_check(m, 1); + while (m2->next != NULL) + m2 = m2->next; + return m2; +} + +/** + * A macro that points to an offset into the data in the mbuf. + * + * The returned pointer is cast to type t. Before using this + * function, the user must ensure that the first segment is large + * enough to accommodate its data. + * + * @param m + * The packet mbuf. + * @param o + * The offset into the mbuf data. + * @param t + * The type to cast the result into. + */ +#define rte_pktmbuf_mtod_offset(m, t, o) \ + ((t)((char *)(m)->buf_addr + (m)->data_off + (o))) + +/** + * A macro that points to the start of the data in the mbuf. + * + * The returned pointer is cast to type t. Before using this + * function, the user must ensure that the first segment is large + * enough to accommodate its data. + * + * @param m + * The packet mbuf. + * @param t + * The type to cast the result into. + */ +#define rte_pktmbuf_mtod(m, t) rte_pktmbuf_mtod_offset(m, t, 0) + +/** + * A macro that returns the physical address that points to an offset of the + * start of the data in the mbuf + * + * @param m + * The packet mbuf. + * @param o + * The offset into the data to calculate address from. + */ +#define rte_pktmbuf_mtophys_offset(m, o) \ + (phys_addr_t)((m)->buf_physaddr + (m)->data_off + (o)) + +/** + * A macro that returns the physical address that points to the start of the + * data in the mbuf + * + * @param m + * The packet mbuf. + */ +#define rte_pktmbuf_mtophys(m) rte_pktmbuf_mtophys_offset(m, 0) + +/** + * A macro that returns the length of the packet. + * + * The value can be read or assigned. + * + * @param m + * The packet mbuf. + */ +#define rte_pktmbuf_pkt_len(m) ((m)->pkt_len) + +/** + * A macro that returns the length of the segment. + * + * The value can be read or assigned. + * + * @param m + * The packet mbuf. + */ +#define rte_pktmbuf_data_len(m) ((m)->data_len) + +/** + * Prepend len bytes to an mbuf data area. + * + * Returns a pointer to the new + * data start address. If there is not enough headroom in the first + * segment, the function will return NULL, without modifying the mbuf. + * + * @param m + * The pkt mbuf. + * @param len + * The amount of data to prepend (in bytes). + * @return + * A pointer to the start of the newly prepended data, or + * NULL if there is not enough headroom space in the first segment + */ +static inline char *rte_pktmbuf_prepend(struct rte_mbuf *m, + uint16_t len) +{ + __rte_mbuf_sanity_check(m, 1); + + if (unlikely(len > rte_pktmbuf_headroom(m))) + return NULL; + + m->data_off -= len; + m->data_len = (uint16_t)(m->data_len + len); + m->pkt_len = (m->pkt_len + len); + + return (char *)m->buf_addr + m->data_off; +} + +/** + * Append len bytes to an mbuf. + * + * Append len bytes to an mbuf and return a pointer to the start address + * of the added data. If there is not enough tailroom in the last + * segment, the function will return NULL, without modifying the mbuf. + * + * @param m + * The packet mbuf. + * @param len + * The amount of data to append (in bytes). + * @return + * A pointer to the start of the newly appended data, or + * NULL if there is not enough tailroom space in the last segment + */ +static inline char *rte_pktmbuf_append(struct rte_mbuf *m, uint16_t len) +{ + void *tail; + struct rte_mbuf *m_last; + + __rte_mbuf_sanity_check(m, 1); + + m_last = rte_pktmbuf_lastseg(m); + if (unlikely(len > rte_pktmbuf_tailroom(m_last))) + return NULL; + + tail = (char *)m_last->buf_addr + m_last->data_off + m_last->data_len; + m_last->data_len = (uint16_t)(m_last->data_len + len); + m->pkt_len = (m->pkt_len + len); + return (char*) tail; +} + +/** + * Remove len bytes at the beginning of an mbuf. + * + * Returns a pointer to the start address of the new data area. If the + * length is greater than the length of the first segment, then the + * function will fail and return NULL, without modifying the mbuf. + * + * @param m + * The packet mbuf. + * @param len + * The amount of data to remove (in bytes). + * @return + * A pointer to the new start of the data. + */ +static inline char *rte_pktmbuf_adj(struct rte_mbuf *m, uint16_t len) +{ + __rte_mbuf_sanity_check(m, 1); + + if (unlikely(len > m->data_len)) + return NULL; + + m->data_len = (uint16_t)(m->data_len - len); + m->data_off += len; + m->pkt_len = (m->pkt_len - len); + return (char *)m->buf_addr + m->data_off; +} + +/** + * Remove len bytes of data at the end of the mbuf. + * + * If the length is greater than the length of the last segment, the + * function will fail and return -1 without modifying the mbuf. + * + * @param m + * The packet mbuf. + * @param len + * The amount of data to remove (in bytes). + * @return + * - 0: On success. + * - -1: On error. + */ +static inline int rte_pktmbuf_trim(struct rte_mbuf *m, uint16_t len) +{ + struct rte_mbuf *m_last; + + __rte_mbuf_sanity_check(m, 1); + + m_last = rte_pktmbuf_lastseg(m); + if (unlikely(len > m_last->data_len)) + return -1; + + m_last->data_len = (uint16_t)(m_last->data_len - len); + m->pkt_len = (m->pkt_len - len); + return 0; +} + +/** + * Test if mbuf data is contiguous. + * + * @param m + * The packet mbuf. + * @return + * - 1, if all data is contiguous (one segment). + * - 0, if there is several segments. + */ +static inline int rte_pktmbuf_is_contiguous(const struct rte_mbuf *m) +{ + __rte_mbuf_sanity_check(m, 1); + return !!(m->nb_segs == 1); +} + +/** + * Chain an mbuf to another, thereby creating a segmented packet. + * + * Note: The implementation will do a linear walk over the segments to find + * the tail entry. For cases when there are many segments, it's better to + * chain the entries manually. + * + * @param head + * The head of the mbuf chain (the first packet) + * @param tail + * The mbuf to put last in the chain + * + * @return + * - 0, on success. + * - -EOVERFLOW, if the chain is full (256 entries) + */ +static inline int rte_pktmbuf_chain(struct rte_mbuf *head, struct rte_mbuf *tail) +{ + struct rte_mbuf *cur_tail; + + /* Check for number-of-segments-overflow */ + if (head->nb_segs + tail->nb_segs >= 1 << (sizeof(head->nb_segs) * 8)) + return -EOVERFLOW; + + /* Chain 'tail' onto the old tail */ + cur_tail = rte_pktmbuf_lastseg(head); + cur_tail->next = tail; + + /* accumulate number of segments and total length. */ + head->nb_segs = (uint8_t)(head->nb_segs + tail->nb_segs); + head->pkt_len += tail->pkt_len; + + /* pkt_len is only set in the head */ + tail->pkt_len = tail->data_len; + + return 0; +} + +/** + * Dump an mbuf structure to the console. + * + * Dump all fields for the given packet mbuf and all its associated + * segments (in the case of a chained buffer). + * + * @param f + * A pointer to a file for output + * @param m + * The packet mbuf. + * @param dump_len + * If dump_len != 0, also dump the "dump_len" first data bytes of + * the packet. + */ +void rte_pktmbuf_dump(FILE *f, const struct rte_mbuf *m, unsigned dump_len); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MBUF_H_ */ diff --git a/lib/librte_mbuf/rte_mbuf_version.map b/lib/librte_mbuf/rte_mbuf_version.map new file mode 100644 index 00000000..e10f6bdc --- /dev/null +++ b/lib/librte_mbuf/rte_mbuf_version.map @@ -0,0 +1,20 @@ +DPDK_2.0 { + global: + + rte_ctrlmbuf_init; + rte_get_rx_ol_flag_name; + rte_get_tx_ol_flag_name; + rte_mbuf_sanity_check; + rte_pktmbuf_dump; + rte_pktmbuf_init; + rte_pktmbuf_pool_init; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_pktmbuf_pool_create; + +} DPDK_2.0; diff --git a/lib/librte_mempool/Makefile b/lib/librte_mempool/Makefile new file mode 100644 index 00000000..a6898eff --- /dev/null +++ b/lib/librte_mempool/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_mempool.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_mempool_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_mempool.c +ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) +SRCS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += rte_dom0_mempool.c +endif +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_MEMPOOL)-include := rte_mempool.h + +DEPDIRS-$(CONFIG_RTE_LIBRTE_MEMPOOL) += lib/librte_eal lib/librte_ring + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_mempool/rte_dom0_mempool.c b/lib/librte_mempool/rte_dom0_mempool.c new file mode 100644 index 00000000..0d6d7504 --- /dev/null +++ b/lib/librte_mempool/rte_dom0_mempool.c @@ -0,0 +1,133 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_mempool.h" + +static void +get_phys_map(void *va, phys_addr_t pa[], uint32_t pg_num, + uint32_t pg_sz, uint32_t memseg_id) +{ + uint32_t i; + uint64_t virt_addr, mfn_id; + struct rte_mem_config *mcfg; + uint32_t page_size = getpagesize(); + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + virt_addr = (uintptr_t) mcfg->memseg[memseg_id].addr; + + for (i = 0; i != pg_num; i++) { + mfn_id = ((uintptr_t)va + i * pg_sz - virt_addr) / RTE_PGSIZE_2M; + pa[i] = mcfg->memseg[memseg_id].mfn[mfn_id] * page_size; + } +} + +/* create the mempool for supporting Dom0 */ +struct rte_mempool * +rte_dom0_mempool_create(const char *name, unsigned elt_num, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags) +{ + struct rte_mempool *mp = NULL; + phys_addr_t *pa; + char *va; + size_t sz; + uint32_t pg_num, pg_shift, pg_sz, total_size; + const struct rte_memzone *mz; + char mz_name[RTE_MEMZONE_NAMESIZE]; + int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + + pg_sz = RTE_PGSIZE_2M; + + pg_shift = rte_bsf32(pg_sz); + total_size = rte_mempool_calc_obj_size(elt_size, flags, NULL); + + /* calc max memory size and max number of pages needed. */ + sz = rte_mempool_xmem_size(elt_num, total_size, pg_shift) + + RTE_PGSIZE_2M; + pg_num = sz >> pg_shift; + + /* extract physical mappings of the allocated memory. */ + pa = calloc(pg_num, sizeof (*pa)); + if (pa == NULL) + return mp; + + snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_OBJ_NAME, name); + mz = rte_memzone_reserve(mz_name, sz, socket_id, mz_flags); + if (mz == NULL) { + free(pa); + return mp; + } + + va = (char *)RTE_ALIGN_CEIL((uintptr_t)mz->addr, RTE_PGSIZE_2M); + /* extract physical mappings of the allocated memory. */ + get_phys_map(va, pa, pg_num, pg_sz, mz->memseg_id); + + mp = rte_mempool_xmem_create(name, elt_num, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags, va, pa, pg_num, pg_shift); + + free(pa); + + return mp; +} diff --git a/lib/librte_mempool/rte_mempool.c b/lib/librte_mempool/rte_mempool.c new file mode 100644 index 00000000..f8781e17 --- /dev/null +++ b/lib/librte_mempool/rte_mempool.c @@ -0,0 +1,919 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_mempool.h" + +TAILQ_HEAD(rte_mempool_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_mempool_tailq = { + .name = "RTE_MEMPOOL", +}; +EAL_REGISTER_TAILQ(rte_mempool_tailq) + +#define CACHE_FLUSHTHRESH_MULTIPLIER 1.5 +#define CALC_CACHE_FLUSHTHRESH(c) \ + ((typeof(c))((c) * CACHE_FLUSHTHRESH_MULTIPLIER)) + +/* + * return the greatest common divisor between a and b (fast algorithm) + * + */ +static unsigned get_gcd(unsigned a, unsigned b) +{ + unsigned c; + + if (0 == a) + return b; + if (0 == b) + return a; + + if (a < b) { + c = a; + a = b; + b = c; + } + + while (b != 0) { + c = a % b; + a = b; + b = c; + } + + return a; +} + +/* + * Depending on memory configuration, objects addresses are spread + * between channels and ranks in RAM: the pool allocator will add + * padding between objects. This function return the new size of the + * object. + */ +static unsigned optimize_object_size(unsigned obj_size) +{ + unsigned nrank, nchan; + unsigned new_obj_size; + + /* get number of channels */ + nchan = rte_memory_get_nchannel(); + if (nchan == 0) + nchan = 4; + + nrank = rte_memory_get_nrank(); + if (nrank == 0) + nrank = 1; + + /* process new object size */ + new_obj_size = (obj_size + RTE_MEMPOOL_ALIGN_MASK) / RTE_MEMPOOL_ALIGN; + while (get_gcd(new_obj_size, nrank * nchan) != 1) + new_obj_size++; + return new_obj_size * RTE_MEMPOOL_ALIGN; +} + +static void +mempool_add_elem(struct rte_mempool *mp, void *obj, uint32_t obj_idx, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg) +{ + struct rte_mempool_objhdr *hdr; + struct rte_mempool_objtlr *tlr __rte_unused; + + obj = (char *)obj + mp->header_size; + + /* set mempool ptr in header */ + hdr = RTE_PTR_SUB(obj, sizeof(*hdr)); + hdr->mp = mp; + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2; + tlr = __mempool_get_trailer(obj); + tlr->cookie = RTE_MEMPOOL_TRAILER_COOKIE; +#endif + /* call the initializer */ + if (obj_init) + obj_init(mp, obj_init_arg, obj, obj_idx); + + /* enqueue in ring */ + rte_ring_sp_enqueue(mp->ring, obj); +} + +uint32_t +rte_mempool_obj_iter(void *vaddr, uint32_t elt_num, size_t elt_sz, size_t align, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg) +{ + uint32_t i, j, k; + uint32_t pgn, pgf; + uintptr_t end, start, va; + uintptr_t pg_sz; + + pg_sz = (uintptr_t)1 << pg_shift; + va = (uintptr_t)vaddr; + + i = 0; + j = 0; + + while (i != elt_num && j != pg_num) { + + start = RTE_ALIGN_CEIL(va, align); + end = start + elt_sz; + + /* index of the first page for the next element. */ + pgf = (end >> pg_shift) - (start >> pg_shift); + + /* index of the last page for the current element. */ + pgn = ((end - 1) >> pg_shift) - (start >> pg_shift); + pgn += j; + + /* do we have enough space left for the element. */ + if (pgn >= pg_num) + break; + + for (k = j; + k != pgn && + paddr[k] + pg_sz == paddr[k + 1]; + k++) + ; + + /* + * if next pgn chunks of memory physically continuous, + * use it to create next element. + * otherwise, just skip that chunk unused. + */ + if (k == pgn) { + if (obj_iter != NULL) + obj_iter(obj_iter_arg, (void *)start, + (void *)end, i); + va = end; + j += pgf; + i++; + } else { + va = RTE_ALIGN_CEIL((va + 1), pg_sz); + j++; + } + } + + return i; +} + +/* + * Populate mempool with the objects. + */ + +struct mempool_populate_arg { + struct rte_mempool *mp; + rte_mempool_obj_ctor_t *obj_init; + void *obj_init_arg; +}; + +static void +mempool_obj_populate(void *arg, void *start, void *end, uint32_t idx) +{ + struct mempool_populate_arg *pa = arg; + + mempool_add_elem(pa->mp, start, idx, pa->obj_init, pa->obj_init_arg); + pa->mp->elt_va_end = (uintptr_t)end; +} + +static void +mempool_populate(struct rte_mempool *mp, size_t num, size_t align, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg) +{ + uint32_t elt_sz; + struct mempool_populate_arg arg; + + elt_sz = mp->elt_size + mp->header_size + mp->trailer_size; + arg.mp = mp; + arg.obj_init = obj_init; + arg.obj_init_arg = obj_init_arg; + + mp->size = rte_mempool_obj_iter((void *)mp->elt_va_start, + num, elt_sz, align, + mp->elt_pa, mp->pg_num, mp->pg_shift, + mempool_obj_populate, &arg); +} + +uint32_t +rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, + struct rte_mempool_objsz *sz) +{ + struct rte_mempool_objsz lsz; + + sz = (sz != NULL) ? sz : &lsz; + + /* + * In header, we have at least the pointer to the pool, and + * optionaly a 64 bits cookie. + */ + sz->header_size = 0; + sz->header_size += sizeof(struct rte_mempool *); /* ptr to pool */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sz->header_size += sizeof(uint64_t); /* cookie */ +#endif + if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) + sz->header_size = RTE_ALIGN_CEIL(sz->header_size, + RTE_MEMPOOL_ALIGN); + + /* trailer contains the cookie in debug mode */ + sz->trailer_size = 0; +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + sz->trailer_size += sizeof(uint64_t); /* cookie */ +#endif + /* element size is 8 bytes-aligned at least */ + sz->elt_size = RTE_ALIGN_CEIL(elt_size, sizeof(uint64_t)); + + /* expand trailer to next cache line */ + if ((flags & MEMPOOL_F_NO_CACHE_ALIGN) == 0) { + sz->total_size = sz->header_size + sz->elt_size + + sz->trailer_size; + sz->trailer_size += ((RTE_MEMPOOL_ALIGN - + (sz->total_size & RTE_MEMPOOL_ALIGN_MASK)) & + RTE_MEMPOOL_ALIGN_MASK); + } + + /* + * increase trailer to add padding between objects in order to + * spread them across memory channels/ranks + */ + if ((flags & MEMPOOL_F_NO_SPREAD) == 0) { + unsigned new_size; + new_size = optimize_object_size(sz->header_size + sz->elt_size + + sz->trailer_size); + sz->trailer_size = new_size - sz->header_size - sz->elt_size; + } + + if (! rte_eal_has_hugepages()) { + /* + * compute trailer size so that pool elements fit exactly in + * a standard page + */ + int page_size = getpagesize(); + int new_size = page_size - sz->header_size - sz->elt_size; + if (new_size < 0 || (unsigned int)new_size < sz->trailer_size) { + printf("When hugepages are disabled, pool objects " + "can't exceed PAGE_SIZE: %d + %d + %d > %d\n", + sz->header_size, sz->elt_size, sz->trailer_size, + page_size); + return 0; + } + sz->trailer_size = new_size; + } + + /* this is the size of an object, including header and trailer */ + sz->total_size = sz->header_size + sz->elt_size + sz->trailer_size; + + return sz->total_size; +} + + +/* + * Calculate maximum amount of memory required to store given number of objects. + */ +size_t +rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, uint32_t pg_shift) +{ + size_t n, pg_num, pg_sz, sz; + + pg_sz = (size_t)1 << pg_shift; + + if ((n = pg_sz / elt_sz) > 0) { + pg_num = (elt_num + n - 1) / n; + sz = pg_num << pg_shift; + } else { + sz = RTE_ALIGN_CEIL(elt_sz, pg_sz) * elt_num; + } + + return sz; +} + +/* + * Calculate how much memory would be actually required with the + * given memory footprint to store required number of elements. + */ +static void +mempool_lelem_iter(void *arg, __rte_unused void *start, void *end, + __rte_unused uint32_t idx) +{ + *(uintptr_t *)arg = (uintptr_t)end; +} + +ssize_t +rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift) +{ + uint32_t n; + uintptr_t va, uv; + size_t pg_sz, usz; + + pg_sz = (size_t)1 << pg_shift; + va = (uintptr_t)vaddr; + uv = va; + + if ((n = rte_mempool_obj_iter(vaddr, elt_num, elt_sz, 1, + paddr, pg_num, pg_shift, mempool_lelem_iter, + &uv)) != elt_num) { + return -(ssize_t)n; + } + + uv = RTE_ALIGN_CEIL(uv, pg_sz); + usz = uv - va; + return usz; +} + +#ifndef RTE_LIBRTE_XEN_DOM0 +/* stub if DOM0 support not configured */ +struct rte_mempool * +rte_dom0_mempool_create(const char *name __rte_unused, + unsigned n __rte_unused, + unsigned elt_size __rte_unused, + unsigned cache_size __rte_unused, + unsigned private_data_size __rte_unused, + rte_mempool_ctor_t *mp_init __rte_unused, + void *mp_init_arg __rte_unused, + rte_mempool_obj_ctor_t *obj_init __rte_unused, + void *obj_init_arg __rte_unused, + int socket_id __rte_unused, + unsigned flags __rte_unused) +{ + rte_errno = EINVAL; + return NULL; +} +#endif + +/* create the mempool */ +struct rte_mempool * +rte_mempool_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags) +{ + if (rte_xen_dom0_supported()) + return rte_dom0_mempool_create(name, n, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags); + else + return rte_mempool_xmem_create(name, n, elt_size, + cache_size, private_data_size, + mp_init, mp_init_arg, + obj_init, obj_init_arg, + socket_id, flags, + NULL, NULL, MEMPOOL_PG_NUM_DEFAULT, + MEMPOOL_PG_SHIFT_MAX); +} + +/* + * Create the mempool over already allocated chunk of memory. + * That external memory buffer can consists of physically disjoint pages. + * Setting vaddr to NULL, makes mempool to fallback to original behaviour + * and allocate space for mempool and it's elements as one big chunk of + * physically continuos memory. + * */ +struct rte_mempool * +rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags, void *vaddr, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift) +{ + char mz_name[RTE_MEMZONE_NAMESIZE]; + char rg_name[RTE_RING_NAMESIZE]; + struct rte_mempool_list *mempool_list; + struct rte_mempool *mp = NULL; + struct rte_tailq_entry *te = NULL; + struct rte_ring *r = NULL; + const struct rte_memzone *mz; + size_t mempool_size; + int mz_flags = RTE_MEMZONE_1GB|RTE_MEMZONE_SIZE_HINT_ONLY; + int rg_flags = 0; + void *obj; + struct rte_mempool_objsz objsz; + void *startaddr; + int page_size = getpagesize(); + + /* compilation-time checks */ + RTE_BUILD_BUG_ON((sizeof(struct rte_mempool) & + RTE_CACHE_LINE_MASK) != 0); +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_cache) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, local_cache) & + RTE_CACHE_LINE_MASK) != 0); +#endif +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + RTE_BUILD_BUG_ON((sizeof(struct rte_mempool_debug_stats) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_mempool, stats) & + RTE_CACHE_LINE_MASK) != 0); +#endif + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + /* asked cache too big */ + if (cache_size > RTE_MEMPOOL_CACHE_MAX_SIZE || + CALC_CACHE_FLUSHTHRESH(cache_size) > n) { + rte_errno = EINVAL; + return NULL; + } + + /* check that we have both VA and PA */ + if (vaddr != NULL && paddr == NULL) { + rte_errno = EINVAL; + return NULL; + } + + /* Check that pg_num and pg_shift parameters are valid. */ + if (pg_num < RTE_DIM(mp->elt_pa) || pg_shift > MEMPOOL_PG_SHIFT_MAX) { + rte_errno = EINVAL; + return NULL; + } + + /* "no cache align" imply "no spread" */ + if (flags & MEMPOOL_F_NO_CACHE_ALIGN) + flags |= MEMPOOL_F_NO_SPREAD; + + /* ring flags */ + if (flags & MEMPOOL_F_SP_PUT) + rg_flags |= RING_F_SP_ENQ; + if (flags & MEMPOOL_F_SC_GET) + rg_flags |= RING_F_SC_DEQ; + + /* calculate mempool object sizes. */ + if (!rte_mempool_calc_obj_size(elt_size, flags, &objsz)) { + rte_errno = EINVAL; + return NULL; + } + + rte_rwlock_write_lock(RTE_EAL_MEMPOOL_RWLOCK); + + /* allocate the ring that will be used to store objects */ + /* Ring functions will return appropriate errors if we are + * running as a secondary process etc., so no checks made + * in this function for that condition */ + snprintf(rg_name, sizeof(rg_name), RTE_MEMPOOL_MZ_FORMAT, name); + r = rte_ring_create(rg_name, rte_align32pow2(n+1), socket_id, rg_flags); + if (r == NULL) + goto exit_unlock; + + /* + * reserve a memory zone for this mempool: private data is + * cache-aligned + */ + private_data_size = (private_data_size + + RTE_MEMPOOL_ALIGN_MASK) & (~RTE_MEMPOOL_ALIGN_MASK); + + if (! rte_eal_has_hugepages()) { + /* + * expand private data size to a whole page, so that the + * first pool element will start on a new standard page + */ + int head = sizeof(struct rte_mempool); + int new_size = (private_data_size + head) % page_size; + if (new_size) { + private_data_size += page_size - new_size; + } + } + + /* try to allocate tailq entry */ + te = rte_zmalloc("MEMPOOL_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, MEMPOOL, "Cannot allocate tailq entry!\n"); + goto exit_unlock; + } + + /* + * If user provided an external memory buffer, then use it to + * store mempool objects. Otherwise reserve a memzone that is large + * enough to hold mempool header and metadata plus mempool objects. + */ + mempool_size = MEMPOOL_HEADER_SIZE(mp, pg_num) + private_data_size; + mempool_size = RTE_ALIGN_CEIL(mempool_size, RTE_MEMPOOL_ALIGN); + if (vaddr == NULL) + mempool_size += (size_t)objsz.total_size * n; + + if (! rte_eal_has_hugepages()) { + /* + * we want the memory pool to start on a page boundary, + * because pool elements crossing page boundaries would + * result in discontiguous physical addresses + */ + mempool_size += page_size; + } + + snprintf(mz_name, sizeof(mz_name), RTE_MEMPOOL_MZ_FORMAT, name); + + mz = rte_memzone_reserve(mz_name, mempool_size, socket_id, mz_flags); + if (mz == NULL) + goto exit_unlock; + + if (rte_eal_has_hugepages()) { + startaddr = (void*)mz->addr; + } else { + /* align memory pool start address on a page boundary */ + unsigned long addr = (unsigned long)mz->addr; + if (addr & (page_size - 1)) { + addr += page_size; + addr &= ~(page_size - 1); + } + startaddr = (void*)addr; + } + + /* init the mempool structure */ + mp = startaddr; + memset(mp, 0, sizeof(*mp)); + snprintf(mp->name, sizeof(mp->name), "%s", name); + mp->phys_addr = mz->phys_addr; + mp->ring = r; + mp->size = n; + mp->flags = flags; + mp->elt_size = objsz.elt_size; + mp->header_size = objsz.header_size; + mp->trailer_size = objsz.trailer_size; + mp->cache_size = cache_size; + mp->cache_flushthresh = CALC_CACHE_FLUSHTHRESH(cache_size); + mp->private_data_size = private_data_size; + + /* calculate address of the first element for continuous mempool. */ + obj = (char *)mp + MEMPOOL_HEADER_SIZE(mp, pg_num) + + private_data_size; + obj = RTE_PTR_ALIGN_CEIL(obj, RTE_MEMPOOL_ALIGN); + + /* populate address translation fields. */ + mp->pg_num = pg_num; + mp->pg_shift = pg_shift; + mp->pg_mask = RTE_LEN2MASK(mp->pg_shift, typeof(mp->pg_mask)); + + /* mempool elements allocated together with mempool */ + if (vaddr == NULL) { + mp->elt_va_start = (uintptr_t)obj; + mp->elt_pa[0] = mp->phys_addr + + (mp->elt_va_start - (uintptr_t)mp); + + /* mempool elements in a separate chunk of memory. */ + } else { + mp->elt_va_start = (uintptr_t)vaddr; + memcpy(mp->elt_pa, paddr, sizeof (mp->elt_pa[0]) * pg_num); + } + + mp->elt_va_end = mp->elt_va_start; + + /* call the initializer */ + if (mp_init) + mp_init(mp, mp_init_arg); + + mempool_populate(mp, n, 1, obj_init, obj_init_arg); + + te->data = (void *) mp; + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_INSERT_TAIL(mempool_list, te, next); + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK); + + return mp; + +exit_unlock: + rte_rwlock_write_unlock(RTE_EAL_MEMPOOL_RWLOCK); + rte_ring_free(r); + rte_free(te); + + return NULL; +} + +/* Return the number of entries in the mempool */ +unsigned +rte_mempool_count(const struct rte_mempool *mp) +{ + unsigned count; + + count = rte_ring_count(mp->ring); + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + { + unsigned lcore_id; + if (mp->cache_size == 0) + return count; + + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) + count += mp->local_cache[lcore_id].len; + } +#endif + + /* + * due to race condition (access to len is not locked), the + * total can be greater than size... so fix the result + */ + if (count > mp->size) + return mp->size; + return count; +} + +/* dump the cache status */ +static unsigned +rte_mempool_dump_cache(FILE *f, const struct rte_mempool *mp) +{ +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + unsigned lcore_id; + unsigned count = 0; + unsigned cache_count; + + fprintf(f, " cache infos:\n"); + fprintf(f, " cache_size=%"PRIu32"\n", mp->cache_size); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + cache_count = mp->local_cache[lcore_id].len; + fprintf(f, " cache_count[%u]=%u\n", lcore_id, cache_count); + count += cache_count; + } + fprintf(f, " total_cache_count=%u\n", count); + return count; +#else + RTE_SET_USED(mp); + fprintf(f, " cache disabled\n"); + return 0; +#endif +} + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +/* check cookies before and after objects */ +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif + +struct mempool_audit_arg { + const struct rte_mempool *mp; + uintptr_t obj_end; + uint32_t obj_num; +}; + +static void +mempool_obj_audit(void *arg, void *start, void *end, uint32_t idx) +{ + struct mempool_audit_arg *pa = arg; + void *obj; + + obj = (char *)start + pa->mp->header_size; + pa->obj_end = (uintptr_t)end; + pa->obj_num = idx + 1; + __mempool_check_cookies(pa->mp, &obj, 1, 2); +} + +static void +mempool_audit_cookies(const struct rte_mempool *mp) +{ + uint32_t elt_sz, num; + struct mempool_audit_arg arg; + + elt_sz = mp->elt_size + mp->header_size + mp->trailer_size; + + arg.mp = mp; + arg.obj_end = mp->elt_va_start; + arg.obj_num = 0; + + num = rte_mempool_obj_iter((void *)mp->elt_va_start, + mp->size, elt_sz, 1, + mp->elt_pa, mp->pg_num, mp->pg_shift, + mempool_obj_audit, &arg); + + if (num != mp->size) { + rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) " + "iterated only over %u elements\n", + mp, mp->size, num); + } else if (arg.obj_end != mp->elt_va_end || arg.obj_num != mp->size) { + rte_panic("rte_mempool_obj_iter(mempool=%p, size=%u) " + "last callback va_end: %#tx (%#tx expeceted), " + "num of objects: %u (%u expected)\n", + mp, mp->size, + arg.obj_end, mp->elt_va_end, + arg.obj_num, mp->size); + } +} + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic error "-Wcast-qual" +#endif +#else +#define mempool_audit_cookies(mp) do {} while(0) +#endif + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 +/* check cookies before and after objects */ +static void +mempool_audit_cache(const struct rte_mempool *mp) +{ + /* check cache size consistency */ + unsigned lcore_id; + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + if (mp->local_cache[lcore_id].len > mp->cache_flushthresh) { + RTE_LOG(CRIT, MEMPOOL, "badness on cache[%u]\n", + lcore_id); + rte_panic("MEMPOOL: invalid cache len\n"); + } + } +} +#else +#define mempool_audit_cache(mp) do {} while(0) +#endif + + +/* check the consistency of mempool (size, cookies, ...) */ +void +rte_mempool_audit(const struct rte_mempool *mp) +{ + mempool_audit_cache(mp); + mempool_audit_cookies(mp); + + /* For case where mempool DEBUG is not set, and cache size is 0 */ + RTE_SET_USED(mp); +} + +/* dump the status of the mempool on the console */ +void +rte_mempool_dump(FILE *f, const struct rte_mempool *mp) +{ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + struct rte_mempool_debug_stats sum; + unsigned lcore_id; +#endif + unsigned common_count; + unsigned cache_count; + + RTE_VERIFY(f != NULL); + RTE_VERIFY(mp != NULL); + + fprintf(f, "mempool <%s>@%p\n", mp->name, mp); + fprintf(f, " flags=%x\n", mp->flags); + fprintf(f, " ring=<%s>@%p\n", mp->ring->name, mp->ring); + fprintf(f, " phys_addr=0x%" PRIx64 "\n", mp->phys_addr); + fprintf(f, " size=%"PRIu32"\n", mp->size); + fprintf(f, " header_size=%"PRIu32"\n", mp->header_size); + fprintf(f, " elt_size=%"PRIu32"\n", mp->elt_size); + fprintf(f, " trailer_size=%"PRIu32"\n", mp->trailer_size); + fprintf(f, " total_obj_size=%"PRIu32"\n", + mp->header_size + mp->elt_size + mp->trailer_size); + + fprintf(f, " private_data_size=%"PRIu32"\n", mp->private_data_size); + fprintf(f, " pg_num=%"PRIu32"\n", mp->pg_num); + fprintf(f, " pg_shift=%"PRIu32"\n", mp->pg_shift); + fprintf(f, " pg_mask=%#tx\n", mp->pg_mask); + fprintf(f, " elt_va_start=%#tx\n", mp->elt_va_start); + fprintf(f, " elt_va_end=%#tx\n", mp->elt_va_end); + fprintf(f, " elt_pa[0]=0x%" PRIx64 "\n", mp->elt_pa[0]); + + if (mp->size != 0) + fprintf(f, " avg bytes/object=%#Lf\n", + (long double)(mp->elt_va_end - mp->elt_va_start) / + mp->size); + + cache_count = rte_mempool_dump_cache(f, mp); + common_count = rte_ring_count(mp->ring); + if ((cache_count + common_count) > mp->size) + common_count = mp->size - cache_count; + fprintf(f, " common_pool_count=%u\n", common_count); + + /* sum and dump statistics */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + memset(&sum, 0, sizeof(sum)); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + sum.put_bulk += mp->stats[lcore_id].put_bulk; + sum.put_objs += mp->stats[lcore_id].put_objs; + sum.get_success_bulk += mp->stats[lcore_id].get_success_bulk; + sum.get_success_objs += mp->stats[lcore_id].get_success_objs; + sum.get_fail_bulk += mp->stats[lcore_id].get_fail_bulk; + sum.get_fail_objs += mp->stats[lcore_id].get_fail_objs; + } + fprintf(f, " stats:\n"); + fprintf(f, " put_bulk=%"PRIu64"\n", sum.put_bulk); + fprintf(f, " put_objs=%"PRIu64"\n", sum.put_objs); + fprintf(f, " get_success_bulk=%"PRIu64"\n", sum.get_success_bulk); + fprintf(f, " get_success_objs=%"PRIu64"\n", sum.get_success_objs); + fprintf(f, " get_fail_bulk=%"PRIu64"\n", sum.get_fail_bulk); + fprintf(f, " get_fail_objs=%"PRIu64"\n", sum.get_fail_objs); +#else + fprintf(f, " no statistics available\n"); +#endif + + rte_mempool_audit(mp); +} + +/* dump the status of all mempools on the console */ +void +rte_mempool_list_dump(FILE *f) +{ + const struct rte_mempool *mp = NULL; + struct rte_tailq_entry *te; + struct rte_mempool_list *mempool_list; + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK); + + TAILQ_FOREACH(te, mempool_list, next) { + mp = (struct rte_mempool *) te->data; + rte_mempool_dump(f, mp); + } + + rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK); +} + +/* search a mempool from its name */ +struct rte_mempool * +rte_mempool_lookup(const char *name) +{ + struct rte_mempool *mp = NULL; + struct rte_tailq_entry *te; + struct rte_mempool_list *mempool_list; + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK); + + TAILQ_FOREACH(te, mempool_list, next) { + mp = (struct rte_mempool *) te->data; + if (strncmp(name, mp->name, RTE_MEMPOOL_NAMESIZE) == 0) + break; + } + + rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return mp; +} + +void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *), + void *arg) +{ + struct rte_tailq_entry *te = NULL; + struct rte_mempool_list *mempool_list; + + mempool_list = RTE_TAILQ_CAST(rte_mempool_tailq.head, rte_mempool_list); + + rte_rwlock_read_lock(RTE_EAL_MEMPOOL_RWLOCK); + + TAILQ_FOREACH(te, mempool_list, next) { + (*func)((struct rte_mempool *) te->data, arg); + } + + rte_rwlock_read_unlock(RTE_EAL_MEMPOOL_RWLOCK); +} diff --git a/lib/librte_mempool/rte_mempool.h b/lib/librte_mempool/rte_mempool.h new file mode 100644 index 00000000..9745bf0d --- /dev/null +++ b/lib/librte_mempool/rte_mempool.h @@ -0,0 +1,1408 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMPOOL_H_ +#define _RTE_MEMPOOL_H_ + +/** + * @file + * RTE Mempool. + * + * A memory pool is an allocator of fixed-size object. It is + * identified by its name, and uses a ring to store free objects. It + * provides some other optional services, like a per-core object + * cache, and an alignment helper to ensure that objects are padded + * to spread them equally on all RAM channels, ranks, and so on. + * + * Objects owned by a mempool should never be added in another + * mempool. When an object is freed using rte_mempool_put() or + * equivalent, the object data is not modified; the user can save some + * meta-data in the object data and retrieve them when allocating a + * new object. + * + * Note: the mempool implementation is not preemptable. A lcore must + * not be interrupted by another task that uses the same mempool + * (because it uses a ring which is not preemptable). Also, mempool + * functions must not be used outside the DPDK environment: for + * example, in linuxapp environment, a thread that is not created by + * the EAL must not use mempools. This is due to the per-lcore cache + * that won't work as rte_lcore_id() will not return a correct value. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MEMPOOL_HEADER_COOKIE1 0xbadbadbadadd2e55ULL /**< Header cookie. */ +#define RTE_MEMPOOL_HEADER_COOKIE2 0xf2eef2eedadd2e55ULL /**< Header cookie. */ +#define RTE_MEMPOOL_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/ + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +/** + * A structure that stores the mempool statistics (per-lcore). + */ +struct rte_mempool_debug_stats { + uint64_t put_bulk; /**< Number of puts. */ + uint64_t put_objs; /**< Number of objects successfully put. */ + uint64_t get_success_bulk; /**< Successful allocation number. */ + uint64_t get_success_objs; /**< Objects successfully allocated. */ + uint64_t get_fail_bulk; /**< Failed allocation number. */ + uint64_t get_fail_objs; /**< Objects that failed to be allocated. */ +} __rte_cache_aligned; +#endif + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 +/** + * A structure that stores a per-core object cache. + */ +struct rte_mempool_cache { + unsigned len; /**< Cache len */ + /* + * Cache is allocated to this size to allow it to overflow in certain + * cases to avoid needless emptying of cache. + */ + void *objs[RTE_MEMPOOL_CACHE_MAX_SIZE * 3]; /**< Cache objects */ +} __rte_cache_aligned; +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + +/** + * A structure that stores the size of mempool elements. + */ +struct rte_mempool_objsz { + uint32_t elt_size; /**< Size of an element. */ + uint32_t header_size; /**< Size of header (before elt). */ + uint32_t trailer_size; /**< Size of trailer (after elt). */ + uint32_t total_size; + /**< Total size of an object (header + elt + trailer). */ +}; + +#define RTE_MEMPOOL_NAMESIZE 32 /**< Maximum length of a memory pool. */ +#define RTE_MEMPOOL_MZ_PREFIX "MP_" + +/* "MP_" */ +#define RTE_MEMPOOL_MZ_FORMAT RTE_MEMPOOL_MZ_PREFIX "%s" + +#ifdef RTE_LIBRTE_XEN_DOM0 + +/* "_MP_elt" */ +#define RTE_MEMPOOL_OBJ_NAME "%s_" RTE_MEMPOOL_MZ_PREFIX "elt" + +#else + +#define RTE_MEMPOOL_OBJ_NAME RTE_MEMPOOL_MZ_FORMAT + +#endif /* RTE_LIBRTE_XEN_DOM0 */ + +#define MEMPOOL_PG_SHIFT_MAX (sizeof(uintptr_t) * CHAR_BIT - 1) + +/** Mempool over one chunk of physically continuous memory */ +#define MEMPOOL_PG_NUM_DEFAULT 1 + +#ifndef RTE_MEMPOOL_ALIGN +#define RTE_MEMPOOL_ALIGN RTE_CACHE_LINE_SIZE +#endif + +#define RTE_MEMPOOL_ALIGN_MASK (RTE_MEMPOOL_ALIGN - 1) + +/** + * Mempool object header structure + * + * Each object stored in mempools are prefixed by this header structure, + * it allows to retrieve the mempool pointer from the object. When debug + * is enabled, a cookie is also added in this structure preventing + * corruptions and double-frees. + */ +struct rte_mempool_objhdr { + struct rte_mempool *mp; /**< The mempool owning the object. */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + uint64_t cookie; /**< Debug cookie. */ +#endif +}; + +/** + * Mempool object trailer structure + * + * In debug mode, each object stored in mempools are suffixed by this + * trailer structure containing a cookie preventing memory corruptions. + */ +struct rte_mempool_objtlr { +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + uint64_t cookie; /**< Debug cookie. */ +#endif +}; + +/** + * The RTE mempool structure. + */ +struct rte_mempool { + char name[RTE_MEMPOOL_NAMESIZE]; /**< Name of mempool. */ + struct rte_ring *ring; /**< Ring to store objects. */ + phys_addr_t phys_addr; /**< Phys. addr. of mempool struct. */ + int flags; /**< Flags of the mempool. */ + uint32_t size; /**< Size of the mempool. */ + uint32_t cache_size; /**< Size of per-lcore local cache. */ + uint32_t cache_flushthresh; + /**< Threshold before we flush excess elements. */ + + uint32_t elt_size; /**< Size of an element. */ + uint32_t header_size; /**< Size of header (before elt). */ + uint32_t trailer_size; /**< Size of trailer (after elt). */ + + unsigned private_data_size; /**< Size of private data. */ + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + /** Per-lcore local cache. */ + struct rte_mempool_cache local_cache[RTE_MAX_LCORE]; +#endif + +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + /** Per-lcore statistics. */ + struct rte_mempool_debug_stats stats[RTE_MAX_LCORE]; +#endif + + /* Address translation support, starts from next cache line. */ + + /** Number of elements in the elt_pa array. */ + uint32_t pg_num __rte_cache_aligned; + uint32_t pg_shift; /**< LOG2 of the physical pages. */ + uintptr_t pg_mask; /**< physical page mask value. */ + uintptr_t elt_va_start; + /**< Virtual address of the first mempool object. */ + uintptr_t elt_va_end; + /**< Virtual address of the mempool object. */ + phys_addr_t elt_pa[MEMPOOL_PG_NUM_DEFAULT]; + /**< Array of physical page addresses for the mempool objects buffer. */ + +} __rte_cache_aligned; + +#define MEMPOOL_F_NO_SPREAD 0x0001 /**< Do not spread in memory. */ +#define MEMPOOL_F_NO_CACHE_ALIGN 0x0002 /**< Do not align objs on cache lines.*/ +#define MEMPOOL_F_SP_PUT 0x0004 /**< Default put is "single-producer".*/ +#define MEMPOOL_F_SC_GET 0x0008 /**< Default get is "single-consumer".*/ + +/** + * @internal When debug is enabled, store some statistics. + * + * @param mp + * Pointer to the memory pool. + * @param name + * Name of the statistics field to increment in the memory pool. + * @param n + * Number to add to the object-oriented statistics. + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#define __MEMPOOL_STAT_ADD(mp, name, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) { \ + mp->stats[__lcore_id].name##_objs += n; \ + mp->stats[__lcore_id].name##_bulk += 1; \ + } \ + } while(0) +#else +#define __MEMPOOL_STAT_ADD(mp, name, n) do {} while(0) +#endif + +/** + * Calculate the size of the mempool header. + * + * @param mp + * Pointer to the memory pool. + * @param pgn + * Number of pages used to store mempool objects. + */ +#define MEMPOOL_HEADER_SIZE(mp, pgn) (sizeof(*(mp)) + \ + RTE_ALIGN_CEIL(((pgn) - RTE_DIM((mp)->elt_pa)) * \ + sizeof ((mp)->elt_pa[0]), RTE_CACHE_LINE_SIZE)) + +/** + * Return true if the whole mempool is in contiguous memory. + */ +#define MEMPOOL_IS_CONTIG(mp) \ + ((mp)->pg_num == MEMPOOL_PG_NUM_DEFAULT && \ + (mp)->phys_addr == (mp)->elt_pa[0]) + +/* return the header of a mempool object (internal) */ +static inline struct rte_mempool_objhdr *__mempool_get_header(void *obj) +{ + return (struct rte_mempool_objhdr *)RTE_PTR_SUB(obj, sizeof(struct rte_mempool_objhdr)); +} + +/** + * Return a pointer to the mempool owning this object. + * + * @param obj + * An object that is owned by a pool. If this is not the case, + * the behavior is undefined. + * @return + * A pointer to the mempool structure. + */ +static inline struct rte_mempool *rte_mempool_from_obj(void *obj) +{ + struct rte_mempool_objhdr *hdr = __mempool_get_header(obj); + return hdr->mp; +} + +/* return the trailer of a mempool object (internal) */ +static inline struct rte_mempool_objtlr *__mempool_get_trailer(void *obj) +{ + struct rte_mempool *mp = rte_mempool_from_obj(obj); + return (struct rte_mempool_objtlr *)RTE_PTR_ADD(obj, mp->elt_size); +} + +/** + * @internal Check and update cookies or panic. + * + * @param mp + * Pointer to the memory pool. + * @param obj_table_const + * Pointer to a table of void * pointers (objects). + * @param n + * Index of object in object table. + * @param free + * - 0: object is supposed to be allocated, mark it as free + * - 1: object is supposed to be free, mark it as allocated + * - 2: just check that cookie is valid (free or allocated) + */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wcast-qual" +#endif +static inline void __mempool_check_cookies(const struct rte_mempool *mp, + void * const *obj_table_const, + unsigned n, int free) +{ + struct rte_mempool_objhdr *hdr; + struct rte_mempool_objtlr *tlr; + uint64_t cookie; + void *tmp; + void *obj; + void **obj_table; + + /* Force to drop the "const" attribute. This is done only when + * DEBUG is enabled */ + tmp = (void *) obj_table_const; + obj_table = (void **) tmp; + + while (n--) { + obj = obj_table[n]; + + if (rte_mempool_from_obj(obj) != mp) + rte_panic("MEMPOOL: object is owned by another " + "mempool\n"); + + hdr = __mempool_get_header(obj); + cookie = hdr->cookie; + + if (free == 0) { + if (cookie != RTE_MEMPOOL_HEADER_COOKIE1) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad header cookie (put)\n"); + } + hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE2; + } + else if (free == 1) { + if (cookie != RTE_MEMPOOL_HEADER_COOKIE2) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad header cookie (get)\n"); + } + hdr->cookie = RTE_MEMPOOL_HEADER_COOKIE1; + } + else if (free == 2) { + if (cookie != RTE_MEMPOOL_HEADER_COOKIE1 && + cookie != RTE_MEMPOOL_HEADER_COOKIE2) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad header cookie (audit)\n"); + } + } + tlr = __mempool_get_trailer(obj); + cookie = tlr->cookie; + if (cookie != RTE_MEMPOOL_TRAILER_COOKIE) { + rte_log_set_history(0); + RTE_LOG(CRIT, MEMPOOL, + "obj=%p, mempool=%p, cookie=%" PRIx64 "\n", + obj, (const void *) mp, cookie); + rte_panic("MEMPOOL: bad trailer cookie\n"); + } + } +} +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic error "-Wcast-qual" +#endif +#else +#define __mempool_check_cookies(mp, obj_table_const, n, free) do {} while(0) +#endif /* RTE_LIBRTE_MEMPOOL_DEBUG */ + +/** + * A mempool object iterator callback function. + */ +typedef void (*rte_mempool_obj_iter_t)(void * /*obj_iter_arg*/, + void * /*obj_start*/, + void * /*obj_end*/, + uint32_t /*obj_index */); + +/** + * Call a function for each mempool object in a memory chunk + * + * Iterate across objects of the given size and alignment in the + * provided chunk of memory. The given memory buffer can consist of + * disjointed physical pages. + * + * For each object, call the provided callback (if any). This function + * is used to populate a mempool, or walk through all the elements of a + * mempool, or estimate how many elements of the given size could be + * created in the given memory buffer. + * + * @param vaddr + * Virtual address of the memory buffer. + * @param elt_num + * Maximum number of objects to iterate through. + * @param elt_sz + * Size of each object. + * @param align + * Alignment of each object. + * @param paddr + * Array of physical addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @param obj_iter + * Object iterator callback function (could be NULL). + * @param obj_iter_arg + * User defined parameter for the object iterator callback function. + * + * @return + * Number of objects iterated through. + */ +uint32_t rte_mempool_obj_iter(void *vaddr, + uint32_t elt_num, size_t elt_sz, size_t align, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift, + rte_mempool_obj_iter_t obj_iter, void *obj_iter_arg); + +/** + * An object constructor callback function for mempool. + * + * Arguments are the mempool, the opaque pointer given by the user in + * rte_mempool_create(), the pointer to the element and the index of + * the element in the pool. + */ +typedef void (rte_mempool_obj_ctor_t)(struct rte_mempool *, void *, + void *, unsigned); + +/** + * A mempool constructor callback function. + * + * Arguments are the mempool and the opaque pointer given by the user in + * rte_mempool_create(). + */ +typedef void (rte_mempool_ctor_t)(struct rte_mempool *, void *); + +/** + * Create a new mempool named *name* in memory. + * + * This function uses ``memzone_reserve()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * All elements of the mempool are allocated together with the mempool header, + * in one physically continuous chunk of memory. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE and n / 1.5. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid losing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_mempool_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags); + +/** + * Create a new mempool named *name* in memory. + * + * This function uses ``memzone_reserve()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * Depending on the input parameters, mempool elements can be either allocated + * together with the mempool header, or an externally provided memory buffer + * could be used to store mempool objects. In later case, that external + * memory buffer can consist of set of disjoint physical pages. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid losing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @param vaddr + * Virtual address of the externally allocated memory buffer. + * Will be used to store mempool objects. + * @param paddr + * Array of physical addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_mempool_xmem_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags, void *vaddr, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift); + +/** + * Create a new mempool named *name* in memory on Xen Dom0. + * + * This function uses ``rte_mempool_xmem_create()`` to allocate memory. The + * pool contains n elements of elt_size. Its size is set to n. + * All elements of the mempool are allocated together with the mempool header, + * and memory buffer can consist of set of disjoint physical pages. + * + * @param name + * The name of the mempool. + * @param n + * The number of elements in the mempool. The optimum size (in terms of + * memory usage) for a mempool is when n is a power of two minus one: + * n = (2^q - 1). + * @param elt_size + * The size of each element. + * @param cache_size + * If cache_size is non-zero, the rte_mempool library will try to + * limit the accesses to the common lockless pool, by maintaining a + * per-lcore object cache. This argument must be lower or equal to + * CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE. It is advised to choose + * cache_size to have "n modulo cache_size == 0": if this is + * not the case, some elements will always stay in the pool and will + * never be used. The access to the per-lcore table is of course + * faster than the multi-producer/consumer pool. The cache can be + * disabled if the cache_size argument is set to 0; it can be useful to + * avoid losing objects in cache. Note that even if not used, the + * memory space for cache is always reserved in a mempool structure, + * except if CONFIG_RTE_MEMPOOL_CACHE_MAX_SIZE is set to 0. + * @param private_data_size + * The size of the private data appended after the mempool + * structure. This is useful for storing some private data after the + * mempool structure, as is done for rte_mbuf_pool for example. + * @param mp_init + * A function pointer that is called for initialization of the pool, + * before object initialization. The user can initialize the private + * data in this function if needed. This parameter can be NULL if + * not needed. + * @param mp_init_arg + * An opaque pointer to data that can be used in the mempool + * constructor function. + * @param obj_init + * A function pointer that is called for each object at + * initialization of the pool. The user can set some meta data in + * objects if needed. This parameter can be NULL if not needed. + * The obj_init() function takes the mempool pointer, the init_arg, + * the object pointer and the object number as parameters. + * @param obj_init_arg + * An opaque pointer to data that can be used as an argument for + * each call to the object constructor function. + * @param socket_id + * The *socket_id* argument is the socket identifier in the case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The *flags* arguments is an OR of following flags: + * - MEMPOOL_F_NO_SPREAD: By default, objects addresses are spread + * between channels in RAM: the pool allocator will add padding + * between objects depending on the hardware configuration. See + * Memory alignment constraints for details. If this flag is set, + * the allocator will just align them to a cache line. + * - MEMPOOL_F_NO_CACHE_ALIGN: By default, the returned objects are + * cache-aligned. This flag removes this constraint, and no + * padding will be present between objects. This flag implies + * MEMPOOL_F_NO_SPREAD. + * - MEMPOOL_F_SP_PUT: If this flag is set, the default behavior + * when using rte_mempool_put() or rte_mempool_put_bulk() is + * "single-producer". Otherwise, it is "multi-producers". + * - MEMPOOL_F_SC_GET: If this flag is set, the default behavior + * when using rte_mempool_get() or rte_mempool_get_bulk() is + * "single-consumer". Otherwise, it is "multi-consumers". + * @return + * The pointer to the new allocated mempool, on success. NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - cache size provided is too large + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_mempool * +rte_dom0_mempool_create(const char *name, unsigned n, unsigned elt_size, + unsigned cache_size, unsigned private_data_size, + rte_mempool_ctor_t *mp_init, void *mp_init_arg, + rte_mempool_obj_ctor_t *obj_init, void *obj_init_arg, + int socket_id, unsigned flags); + + +/** + * Dump the status of the mempool to the console. + * + * @param f + * A pointer to a file for output + * @param mp + * A pointer to the mempool structure. + */ +void rte_mempool_dump(FILE *f, const struct rte_mempool *mp); + +/** + * @internal Put several objects back in the mempool; used internally. + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to store back in the mempool, must be strictly + * positive. + * @param is_mp + * Mono-producer (0) or multi-producers (1). + */ +static inline void __attribute__((always_inline)) +__mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n, int is_mp) +{ +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + struct rte_mempool_cache *cache; + uint32_t index; + void **cache_objs; + unsigned lcore_id = rte_lcore_id(); + uint32_t cache_size = mp->cache_size; + uint32_t flushthresh = mp->cache_flushthresh; +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + + /* increment stat now, adding in mempool always success */ + __MEMPOOL_STAT_ADD(mp, put, n); + +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + /* cache is not enabled or single producer or non-EAL thread */ + if (unlikely(cache_size == 0 || is_mp == 0 || + lcore_id >= RTE_MAX_LCORE)) + goto ring_enqueue; + + /* Go straight to ring if put would overflow mem allocated for cache */ + if (unlikely(n > RTE_MEMPOOL_CACHE_MAX_SIZE)) + goto ring_enqueue; + + cache = &mp->local_cache[lcore_id]; + cache_objs = &cache->objs[cache->len]; + + /* + * The cache follows the following algorithm + * 1. Add the objects to the cache + * 2. Anything greater than the cache min value (if it crosses the + * cache flush threshold) is flushed to the ring. + */ + + /* Add elements back into the cache */ + for (index = 0; index < n; ++index, obj_table++) + cache_objs[index] = *obj_table; + + cache->len += n; + + if (cache->len >= flushthresh) { + rte_ring_mp_enqueue_bulk(mp->ring, &cache->objs[cache_size], + cache->len - cache_size); + cache->len = cache_size; + } + + return; + +ring_enqueue: +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + + /* push remaining objects in ring */ +#ifdef RTE_LIBRTE_MEMPOOL_DEBUG + if (is_mp) { + if (rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n) < 0) + rte_panic("cannot put objects in mempool\n"); + } + else { + if (rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n) < 0) + rte_panic("cannot put objects in mempool\n"); + } +#else + if (is_mp) + rte_ring_mp_enqueue_bulk(mp->ring, obj_table, n); + else + rte_ring_sp_enqueue_bulk(mp->ring, obj_table, n); +#endif +} + + +/** + * Put several objects back in the mempool (multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the mempool from the obj_table. + */ +static inline void __attribute__((always_inline)) +rte_mempool_mp_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n) +{ + __mempool_check_cookies(mp, obj_table, n, 0); + __mempool_put_bulk(mp, obj_table, n, 1); +} + +/** + * Put several objects back in the mempool (NOT multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the mempool from obj_table. + */ +static inline void +rte_mempool_sp_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n) +{ + __mempool_check_cookies(mp, obj_table, n, 0); + __mempool_put_bulk(mp, obj_table, n, 0); +} + +/** + * Put several objects back in the mempool. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * mempool creation time (see flags). + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the mempool from obj_table. + */ +static inline void __attribute__((always_inline)) +rte_mempool_put_bulk(struct rte_mempool *mp, void * const *obj_table, + unsigned n) +{ + __mempool_check_cookies(mp, obj_table, n, 0); + __mempool_put_bulk(mp, obj_table, n, !(mp->flags & MEMPOOL_F_SP_PUT)); +} + +/** + * Put one object in the mempool (multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj + * A pointer to the object to be added. + */ +static inline void __attribute__((always_inline)) +rte_mempool_mp_put(struct rte_mempool *mp, void *obj) +{ + rte_mempool_mp_put_bulk(mp, &obj, 1); +} + +/** + * Put one object back in the mempool (NOT multi-producers safe). + * + * @param mp + * A pointer to the mempool structure. + * @param obj + * A pointer to the object to be added. + */ +static inline void __attribute__((always_inline)) +rte_mempool_sp_put(struct rte_mempool *mp, void *obj) +{ + rte_mempool_sp_put_bulk(mp, &obj, 1); +} + +/** + * Put one object back in the mempool. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * mempool creation time (see flags). + * + * @param mp + * A pointer to the mempool structure. + * @param obj + * A pointer to the object to be added. + */ +static inline void __attribute__((always_inline)) +rte_mempool_put(struct rte_mempool *mp, void *obj) +{ + rte_mempool_put_bulk(mp, &obj, 1); +} + +/** + * @internal Get several objects from the mempool; used internally. + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to get, must be strictly positive. + * @param is_mc + * Mono-consumer (0) or multi-consumers (1). + * @return + * - >=0: Success; number of objects supplied. + * - <0: Error; code of ring dequeue function. + */ +static inline int __attribute__((always_inline)) +__mempool_get_bulk(struct rte_mempool *mp, void **obj_table, + unsigned n, int is_mc) +{ + int ret; +#if RTE_MEMPOOL_CACHE_MAX_SIZE > 0 + struct rte_mempool_cache *cache; + uint32_t index, len; + void **cache_objs; + unsigned lcore_id = rte_lcore_id(); + uint32_t cache_size = mp->cache_size; + + /* cache is not enabled or single consumer */ + if (unlikely(cache_size == 0 || is_mc == 0 || + n >= cache_size || lcore_id >= RTE_MAX_LCORE)) + goto ring_dequeue; + + cache = &mp->local_cache[lcore_id]; + cache_objs = cache->objs; + + /* Can this be satisfied from the cache? */ + if (cache->len < n) { + /* No. Backfill the cache first, and then fill from it */ + uint32_t req = n + (cache_size - cache->len); + + /* How many do we require i.e. number to fill the cache + the request */ + ret = rte_ring_mc_dequeue_bulk(mp->ring, &cache->objs[cache->len], req); + if (unlikely(ret < 0)) { + /* + * In the offchance that we are buffer constrained, + * where we are not able to allocate cache + n, go to + * the ring directly. If that fails, we are truly out of + * buffers. + */ + goto ring_dequeue; + } + + cache->len += req; + } + + /* Now fill in the response ... */ + for (index = 0, len = cache->len - 1; index < n; ++index, len--, obj_table++) + *obj_table = cache_objs[len]; + + cache->len -= n; + + __MEMPOOL_STAT_ADD(mp, get_success, n); + + return 0; + +ring_dequeue: +#endif /* RTE_MEMPOOL_CACHE_MAX_SIZE > 0 */ + + /* get remaining objects from ring */ + if (is_mc) + ret = rte_ring_mc_dequeue_bulk(mp->ring, obj_table, n); + else + ret = rte_ring_sc_dequeue_bulk(mp->ring, obj_table, n); + + if (ret < 0) + __MEMPOOL_STAT_ADD(mp, get_fail, n); + else + __MEMPOOL_STAT_ADD(mp, get_success, n); + + return ret; +} + +/** + * Get several objects from the mempool (multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to get from mempool to obj_table. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_mc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) +{ + int ret; + ret = __mempool_get_bulk(mp, obj_table, n, 1); + if (ret == 0) + __mempool_check_cookies(mp, obj_table, n, 1); + return ret; +} + +/** + * Get several objects from the mempool (NOT multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to get from the mempool to obj_table. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is + * retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_sc_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) +{ + int ret; + ret = __mempool_get_bulk(mp, obj_table, n, 0); + if (ret == 0) + __mempool_check_cookies(mp, obj_table, n, 1); + return ret; +} + +/** + * Get several objects from the mempool. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behaviour that was specified at + * mempool creation time (see flags). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to get from the mempool to obj_table. + * @return + * - 0: Success; objects taken + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_get_bulk(struct rte_mempool *mp, void **obj_table, unsigned n) +{ + int ret; + ret = __mempool_get_bulk(mp, obj_table, n, + !(mp->flags & MEMPOOL_F_SC_GET)); + if (ret == 0) + __mempool_check_cookies(mp, obj_table, n, 1); + return ret; +} + +/** + * Get one object from the mempool (multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_mc_get(struct rte_mempool *mp, void **obj_p) +{ + return rte_mempool_mc_get_bulk(mp, obj_p, 1); +} + +/** + * Get one object from the mempool (NOT multi-consumers safe). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_sc_get(struct rte_mempool *mp, void **obj_p) +{ + return rte_mempool_sc_get_bulk(mp, obj_p, 1); +} + +/** + * Get one object from the mempool. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behavior that was specified at + * mempool creation (see flags). + * + * If cache is enabled, objects will be retrieved first from cache, + * subsequently from the common pool. Note that it can return -ENOENT when + * the local cache and common pool are empty, even if cache from other + * lcores are full. + * + * @param mp + * A pointer to the mempool structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects taken. + * - -ENOENT: Not enough entries in the mempool; no object is retrieved. + */ +static inline int __attribute__((always_inline)) +rte_mempool_get(struct rte_mempool *mp, void **obj_p) +{ + return rte_mempool_get_bulk(mp, obj_p, 1); +} + +/** + * Return the number of entries in the mempool. + * + * When cache is enabled, this function has to browse the length of + * all lcores, so it should not be used in a data path, but only for + * debug purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * The number of entries in the mempool. + */ +unsigned rte_mempool_count(const struct rte_mempool *mp); + +/** + * Return the number of free entries in the mempool ring. + * i.e. how many entries can be freed back to the mempool. + * + * NOTE: This corresponds to the number of elements *allocated* from the + * memory pool, not the number of elements in the pool itself. To count + * the number elements currently available in the pool, use "rte_mempool_count" + * + * When cache is enabled, this function has to browse the length of + * all lcores, so it should not be used in a data path, but only for + * debug purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * The number of free entries in the mempool. + */ +static inline unsigned +rte_mempool_free_count(const struct rte_mempool *mp) +{ + return mp->size - rte_mempool_count(mp); +} + +/** + * Test if the mempool is full. + * + * When cache is enabled, this function has to browse the length of all + * lcores, so it should not be used in a data path, but only for debug + * purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * - 1: The mempool is full. + * - 0: The mempool is not full. + */ +static inline int +rte_mempool_full(const struct rte_mempool *mp) +{ + return !!(rte_mempool_count(mp) == mp->size); +} + +/** + * Test if the mempool is empty. + * + * When cache is enabled, this function has to browse the length of all + * lcores, so it should not be used in a data path, but only for debug + * purposes. + * + * @param mp + * A pointer to the mempool structure. + * @return + * - 1: The mempool is empty. + * - 0: The mempool is not empty. + */ +static inline int +rte_mempool_empty(const struct rte_mempool *mp) +{ + return !!(rte_mempool_count(mp) == 0); +} + +/** + * Return the physical address of elt, which is an element of the pool mp. + * + * @param mp + * A pointer to the mempool structure. + * @param elt + * A pointer (virtual address) to the element of the pool. + * @return + * The physical address of the elt element. + */ +static inline phys_addr_t +rte_mempool_virt2phy(const struct rte_mempool *mp, const void *elt) +{ + if (rte_eal_has_hugepages()) { + uintptr_t off; + + off = (const char *)elt - (const char *)mp->elt_va_start; + return mp->elt_pa[off >> mp->pg_shift] + (off & mp->pg_mask); + } else { + /* + * If huge pages are disabled, we cannot assume the + * memory region to be physically contiguous. + * Lookup for each element. + */ + return rte_mem_virt2phy(elt); + } +} + +/** + * Check the consistency of mempool objects. + * + * Verify the coherency of fields in the mempool structure. Also check + * that the cookies of mempool objects (even the ones that are not + * present in pool) have a correct value. If not, a panic will occur. + * + * @param mp + * A pointer to the mempool structure. + */ +void rte_mempool_audit(const struct rte_mempool *mp); + +/** + * Return a pointer to the private data in an mempool structure. + * + * @param mp + * A pointer to the mempool structure. + * @return + * A pointer to the private data. + */ +static inline void *rte_mempool_get_priv(struct rte_mempool *mp) +{ + return (char *)mp + MEMPOOL_HEADER_SIZE(mp, mp->pg_num); +} + +/** + * Dump the status of all mempools on the console + * + * @param f + * A pointer to a file for output + */ +void rte_mempool_list_dump(FILE *f); + +/** + * Search a mempool from its name + * + * @param name + * The name of the mempool. + * @return + * The pointer to the mempool matching the name, or NULL if not found. + * NULL on error + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + * + */ +struct rte_mempool *rte_mempool_lookup(const char *name); + +/** + * Get the header, trailer and total size of a mempool element. + * + * Given a desired size of the mempool element and mempool flags, + * calculates header, trailer, body and total sizes of the mempool object. + * + * @param elt_size + * The size of each element. + * @param flags + * The flags used for the mempool creation. + * Consult rte_mempool_create() for more information about possible values. + * The size of each element. + * @param sz + * The calculated detailed size the mempool object. May be NULL. + * @return + * Total size of the mempool object. + */ +uint32_t rte_mempool_calc_obj_size(uint32_t elt_size, uint32_t flags, + struct rte_mempool_objsz *sz); + +/** + * Get the size of memory required to store mempool elements. + * + * Calculate the maximum amount of memory required to store given number + * of objects. Assume that the memory buffer will be aligned at page + * boundary. + * + * Note that if object size is bigger then page size, then it assumes + * that pages are grouped in subsets of physically continuous pages big + * enough to store at least one object. + * + * @param elt_num + * Number of elements. + * @param elt_sz + * The size of each element. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * Required memory size aligned at page boundary. + */ +size_t rte_mempool_xmem_size(uint32_t elt_num, size_t elt_sz, + uint32_t pg_shift); + +/** + * Get the size of memory required to store mempool elements. + * + * Calculate how much memory would be actually required with the given + * memory footprint to store required number of objects. + * + * @param vaddr + * Virtual address of the externally allocated memory buffer. + * Will be used to store mempool objects. + * @param elt_num + * Number of elements. + * @param elt_sz + * The size of each element. + * @param paddr + * Array of physical addresses of the pages that comprises given memory + * buffer. + * @param pg_num + * Number of elements in the paddr array. + * @param pg_shift + * LOG2 of the physical pages size. + * @return + * On success, the number of bytes needed to store given number of + * objects, aligned to the given page size. If the provided memory + * buffer is too small, return a negative value whose absolute value + * is the actual number of elements that can be stored in that buffer. + */ +ssize_t rte_mempool_xmem_usage(void *vaddr, uint32_t elt_num, size_t elt_sz, + const phys_addr_t paddr[], uint32_t pg_num, uint32_t pg_shift); + +/** + * Walk list of all memory pools + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + */ +void rte_mempool_walk(void (*func)(const struct rte_mempool *, void *arg), + void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMPOOL_H_ */ diff --git a/lib/librte_mempool/rte_mempool_version.map b/lib/librte_mempool/rte_mempool_version.map new file mode 100644 index 00000000..17151e08 --- /dev/null +++ b/lib/librte_mempool/rte_mempool_version.map @@ -0,0 +1,19 @@ +DPDK_2.0 { + global: + + rte_dom0_mempool_create; + rte_mempool_audit; + rte_mempool_calc_obj_size; + rte_mempool_count; + rte_mempool_create; + rte_mempool_dump; + rte_mempool_list_dump; + rte_mempool_lookup; + rte_mempool_obj_iter; + rte_mempool_walk; + rte_mempool_xmem_create; + rte_mempool_xmem_size; + rte_mempool_xmem_usage; + + local: *; +}; diff --git a/lib/librte_meter/Makefile b/lib/librte_meter/Makefile new file mode 100644 index 00000000..f07fced7 --- /dev/null +++ b/lib/librte_meter/Makefile @@ -0,0 +1,59 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_meter.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +LDLIBS += -lm + +EXPORT_MAP := rte_meter_version.map + +LIBABIVER := 1 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_METER) := rte_meter.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_METER)-include := rte_meter.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_METER) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_meter/rte_meter.c b/lib/librte_meter/rte_meter.c new file mode 100644 index 00000000..5e2dadb8 --- /dev/null +++ b/lib/librte_meter/rte_meter.c @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include + +#include "rte_meter.h" + +#ifndef RTE_METER_TB_PERIOD_MIN +#define RTE_METER_TB_PERIOD_MIN 100 +#endif + +static void +rte_meter_get_tb_params(uint64_t hz, uint64_t rate, uint64_t *tb_period, uint64_t *tb_bytes_per_period) +{ + double period = ((double) hz) / ((double) rate); + + if (period >= RTE_METER_TB_PERIOD_MIN) { + *tb_bytes_per_period = 1; + *tb_period = (uint64_t) period; + } else { + *tb_bytes_per_period = (uint64_t) ceil(RTE_METER_TB_PERIOD_MIN / period); + *tb_period = (hz * (*tb_bytes_per_period)) / rate; + } +} + +int +rte_meter_srtcm_config(struct rte_meter_srtcm *m, struct rte_meter_srtcm_params *params) +{ + uint64_t hz; + + /* Check input parameters */ + if ((m == NULL) || (params == NULL)) { + return -1; + } + + if ((params->cir == 0) || ((params->cbs == 0) && (params->ebs == 0))) { + return -2; + } + + /* Initialize srTCM run-time structure */ + hz = rte_get_tsc_hz(); + m->time = rte_get_tsc_cycles(); + m->tc = m->cbs = params->cbs; + m->te = m->ebs = params->ebs; + rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period); + + RTE_LOG(INFO, METER, "Low level srTCM config: \n" + "\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n", + m->cir_period, m->cir_bytes_per_period); + + return 0; +} + +int +rte_meter_trtcm_config(struct rte_meter_trtcm *m, struct rte_meter_trtcm_params *params) +{ + uint64_t hz; + + /* Check input parameters */ + if ((m == NULL) || (params == NULL)) { + return -1; + } + + if ((params->cir == 0) || (params->pir == 0) || (params->pir < params->cir) || + (params->cbs == 0) || (params->pbs == 0)) { + return -2; + } + + /* Initialize trTCM run-time structure */ + hz = rte_get_tsc_hz(); + m->time_tc = m->time_tp = rte_get_tsc_cycles(); + m->tc = m->cbs = params->cbs; + m->tp = m->pbs = params->pbs; + rte_meter_get_tb_params(hz, params->cir, &m->cir_period, &m->cir_bytes_per_period); + rte_meter_get_tb_params(hz, params->pir, &m->pir_period, &m->pir_bytes_per_period); + + RTE_LOG(INFO, METER, "Low level trTCM config: \n" + "\tCIR period = %" PRIu64 ", CIR bytes per period = %" PRIu64 "\n" + "\tPIR period = %" PRIu64 ", PIR bytes per period = %" PRIu64 "\n", + m->cir_period, m->cir_bytes_per_period, + m->pir_period, m->pir_bytes_per_period); + + return 0; +} diff --git a/lib/librte_meter/rte_meter.h b/lib/librte_meter/rte_meter.h new file mode 100644 index 00000000..2cd8d814 --- /dev/null +++ b/lib/librte_meter/rte_meter.h @@ -0,0 +1,387 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_METER_H__ +#define __INCLUDE_RTE_METER_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Traffic Metering + * + * Traffic metering algorithms: + * 1. Single Rate Three Color Marker (srTCM): defined by IETF RFC 2697 + * 2. Two Rate Three Color Marker (trTCM): defined by IETF RFC 2698 + * + ***/ + +#include + +/* + * Application Programmer's Interface (API) + * + ***/ + +/** Packet Color Set */ +enum rte_meter_color { + e_RTE_METER_GREEN = 0, /**< Green */ + e_RTE_METER_YELLOW, /**< Yellow */ + e_RTE_METER_RED, /**< Red */ + e_RTE_METER_COLORS /**< Number of available colors */ +}; + +/** srTCM parameters per metered traffic flow. The CIR, CBS and EBS parameters only +count bytes of IP packets and do not include link specific headers. At least one of +the CBS or EBS parameters has to be greater than zero. */ +struct rte_meter_srtcm_params { + uint64_t cir; /**< Committed Information Rate (CIR). Measured in bytes per second. */ + uint64_t cbs; /**< Committed Burst Size (CBS). Measured in bytes. */ + uint64_t ebs; /**< Excess Burst Size (EBS). Measured in bytes. */ +}; + +/** trTCM parameters per metered traffic flow. The CIR, PIR, CBS and PBS parameters +only count bytes of IP packets and do not include link specific headers. PIR has to +be greater than or equal to CIR. Both CBS or EBS have to be greater than zero. */ +struct rte_meter_trtcm_params { + uint64_t cir; /**< Committed Information Rate (CIR). Measured in bytes per second. */ + uint64_t pir; /**< Peak Information Rate (PIR). Measured in bytes per second. */ + uint64_t cbs; /**< Committed Burst Size (CBS). Measured in byes. */ + uint64_t pbs; /**< Peak Burst Size (PBS). Measured in bytes. */ +}; + +/** Internal data structure storing the srTCM run-time context per metered traffic flow. */ +struct rte_meter_srtcm; + +/** Internal data structure storing the trTCM run-time context per metered traffic flow. */ +struct rte_meter_trtcm; + +/** + * srTCM configuration per metered traffic flow + * + * @param m + * Pointer to pre-allocated srTCM data structure + * @param params + * User parameters per srTCM metered traffic flow + * @return + * 0 upon success, error code otherwise + */ +int +rte_meter_srtcm_config(struct rte_meter_srtcm *m, + struct rte_meter_srtcm_params *params); + +/** + * trTCM configuration per metered traffic flow + * + * @param m + * Pointer to pre-allocated trTCM data structure + * @param params + * User parameters per trTCM metered traffic flow + * @return + * 0 upon success, error code otherwise + */ +int +rte_meter_trtcm_config(struct rte_meter_trtcm *m, + struct rte_meter_trtcm_params *params); + +/** + * srTCM color blind traffic metering + * + * @param m + * Handle to srTCM instance + * @param time + * Current CPU time stamp (measured in CPU cycles) + * @param pkt_len + * Length of the current IP packet (measured in bytes) + * @return + * Color assigned to the current IP packet + */ +static inline enum rte_meter_color +rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, + uint64_t time, + uint32_t pkt_len); + +/** + * srTCM color aware traffic metering + * + * @param m + * Handle to srTCM instance + * @param time + * Current CPU time stamp (measured in CPU cycles) + * @param pkt_len + * Length of the current IP packet (measured in bytes) + * @param pkt_color + * Input color of the current IP packet + * @return + * Color assigned to the current IP packet + */ +static inline enum rte_meter_color +rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, + uint64_t time, + uint32_t pkt_len, + enum rte_meter_color pkt_color); + +/** + * trTCM color blind traffic metering + * + * @param m + * Handle to trTCM instance + * @param time + * Current CPU time stamp (measured in CPU cycles) + * @param pkt_len + * Length of the current IP packet (measured in bytes) + * @return + * Color assigned to the current IP packet + */ +static inline enum rte_meter_color +rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, + uint64_t time, + uint32_t pkt_len); + +/** + * trTCM color aware traffic metering + * + * @param m + * Handle to trTCM instance + * @param time + * Current CPU time stamp (measured in CPU cycles) + * @param pkt_len + * Length of the current IP packet (measured in bytes) + * @param pkt_color + * Input color of the current IP packet + * @return + * Color assigned to the current IP packet + */ +static inline enum rte_meter_color +rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m, + uint64_t time, + uint32_t pkt_len, + enum rte_meter_color pkt_color); + +/* + * Inline implementation of run-time methods + * + ***/ + +/* Internal data structure storing the srTCM run-time context per metered traffic flow. */ +struct rte_meter_srtcm { + uint64_t time; /* Time of latest update of C and E token buckets */ + uint64_t tc; /* Number of bytes currently available in the committed (C) token bucket */ + uint64_t te; /* Number of bytes currently available in the excess (E) token bucket */ + uint64_t cbs; /* Upper limit for C token bucket */ + uint64_t ebs; /* Upper limit for E token bucket */ + uint64_t cir_period; /* Number of CPU cycles for one update of C and E token buckets */ + uint64_t cir_bytes_per_period; /* Number of bytes to add to C and E token buckets on each update */ +}; + +/* Internal data structure storing the trTCM run-time context per metered traffic flow. */ +struct rte_meter_trtcm { + uint64_t time_tc; /* Time of latest update of C token bucket */ + uint64_t time_tp; /* Time of latest update of E token bucket */ + uint64_t tc; /* Number of bytes currently available in the committed (C) token bucket */ + uint64_t tp; /* Number of bytes currently available in the peak (P) token bucket */ + uint64_t cbs; /* Upper limit for C token bucket */ + uint64_t pbs; /* Upper limit for P token bucket */ + uint64_t cir_period; /* Number of CPU cycles for one update of C token bucket */ + uint64_t cir_bytes_per_period; /* Number of bytes to add to C token bucket on each update */ + uint64_t pir_period; /* Number of CPU cycles for one update of P token bucket */ + uint64_t pir_bytes_per_period; /* Number of bytes to add to P token bucket on each update */ +}; + +static inline enum rte_meter_color +rte_meter_srtcm_color_blind_check(struct rte_meter_srtcm *m, + uint64_t time, + uint32_t pkt_len) +{ + uint64_t time_diff, n_periods, tc, te; + + /* Bucket update */ + time_diff = time - m->time; + n_periods = time_diff / m->cir_period; + m->time += n_periods * m->cir_period; + + tc = m->tc + n_periods * m->cir_bytes_per_period; + if (tc > m->cbs) + tc = m->cbs; + + te = m->te + n_periods * m->cir_bytes_per_period; + if (te > m->ebs) + te = m->ebs; + + /* Color logic */ + if (tc >= pkt_len) { + m->tc = tc - pkt_len; + m->te = te; + return e_RTE_METER_GREEN; + } + + if (te >= pkt_len) { + m->tc = tc; + m->te = te - pkt_len; + return e_RTE_METER_YELLOW; + } + + m->tc = tc; + m->te = te; + return e_RTE_METER_RED; +} + +static inline enum rte_meter_color +rte_meter_srtcm_color_aware_check(struct rte_meter_srtcm *m, + uint64_t time, + uint32_t pkt_len, + enum rte_meter_color pkt_color) +{ + uint64_t time_diff, n_periods, tc, te; + + /* Bucket update */ + time_diff = time - m->time; + n_periods = time_diff / m->cir_period; + m->time += n_periods * m->cir_period; + + tc = m->tc + n_periods * m->cir_bytes_per_period; + if (tc > m->cbs) + tc = m->cbs; + + te = m->te + n_periods * m->cir_bytes_per_period; + if (te > m->ebs) + te = m->ebs; + + /* Color logic */ + if ((pkt_color == e_RTE_METER_GREEN) && (tc >= pkt_len)) { + m->tc = tc - pkt_len; + m->te = te; + return e_RTE_METER_GREEN; + } + + if ((pkt_color != e_RTE_METER_RED) && (te >= pkt_len)) { + m->tc = tc; + m->te = te - pkt_len; + return e_RTE_METER_YELLOW; + } + + m->tc = tc; + m->te = te; + return e_RTE_METER_RED; +} + +static inline enum rte_meter_color +rte_meter_trtcm_color_blind_check(struct rte_meter_trtcm *m, + uint64_t time, + uint32_t pkt_len) +{ + uint64_t time_diff_tc, time_diff_tp, n_periods_tc, n_periods_tp, tc, tp; + + /* Bucket update */ + time_diff_tc = time - m->time_tc; + time_diff_tp = time - m->time_tp; + n_periods_tc = time_diff_tc / m->cir_period; + n_periods_tp = time_diff_tp / m->pir_period; + m->time_tc += n_periods_tc * m->cir_period; + m->time_tp += n_periods_tp * m->pir_period; + + tc = m->tc + n_periods_tc * m->cir_bytes_per_period; + if (tc > m->cbs) + tc = m->cbs; + + tp = m->tp + n_periods_tp * m->pir_bytes_per_period; + if (tp > m->pbs) + tp = m->pbs; + + /* Color logic */ + if (tp < pkt_len) { + m->tc = tc; + m->tp = tp; + return e_RTE_METER_RED; + } + + if (tc < pkt_len) { + m->tc = tc; + m->tp = tp - pkt_len; + return e_RTE_METER_YELLOW; + } + + m->tc = tc - pkt_len; + m->tp = tp - pkt_len; + return e_RTE_METER_GREEN; +} + +static inline enum rte_meter_color +rte_meter_trtcm_color_aware_check(struct rte_meter_trtcm *m, + uint64_t time, + uint32_t pkt_len, + enum rte_meter_color pkt_color) +{ + uint64_t time_diff_tc, time_diff_tp, n_periods_tc, n_periods_tp, tc, tp; + + /* Bucket update */ + time_diff_tc = time - m->time_tc; + time_diff_tp = time - m->time_tp; + n_periods_tc = time_diff_tc / m->cir_period; + n_periods_tp = time_diff_tp / m->pir_period; + m->time_tc += n_periods_tc * m->cir_period; + m->time_tp += n_periods_tp * m->pir_period; + + tc = m->tc + n_periods_tc * m->cir_bytes_per_period; + if (tc > m->cbs) + tc = m->cbs; + + tp = m->tp + n_periods_tp * m->pir_bytes_per_period; + if (tp > m->pbs) + tp = m->pbs; + + /* Color logic */ + if ((pkt_color == e_RTE_METER_RED) || (tp < pkt_len)) { + m->tc = tc; + m->tp = tp; + return e_RTE_METER_RED; + } + + if ((pkt_color == e_RTE_METER_YELLOW) || (tc < pkt_len)) { + m->tc = tc; + m->tp = tp - pkt_len; + return e_RTE_METER_YELLOW; + } + + m->tc = tc - pkt_len; + m->tp = tp - pkt_len; + return e_RTE_METER_GREEN; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_METER_H__ */ diff --git a/lib/librte_meter/rte_meter_version.map b/lib/librte_meter/rte_meter_version.map new file mode 100644 index 00000000..2fd647c6 --- /dev/null +++ b/lib/librte_meter/rte_meter_version.map @@ -0,0 +1,12 @@ +DPDK_2.0 { + global: + + rte_meter_srtcm_color_aware_check; + rte_meter_srtcm_color_blind_check; + rte_meter_srtcm_config; + rte_meter_trtcm_color_aware_check; + rte_meter_trtcm_color_blind_check; + rte_meter_trtcm_config; + + local: *; +}; diff --git a/lib/librte_net/Makefile b/lib/librte_net/Makefile new file mode 100644 index 00000000..ad2e482d --- /dev/null +++ b/lib/librte_net/Makefile @@ -0,0 +1,40 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_NET)-include := rte_ip.h rte_tcp.h rte_udp.h rte_sctp.h rte_icmp.h rte_arp.h + + +include $(RTE_SDK)/mk/rte.install.mk diff --git a/lib/librte_net/rte_arp.h b/lib/librte_net/rte_arp.h new file mode 100644 index 00000000..18364187 --- /dev/null +++ b/lib/librte_net/rte_arp.h @@ -0,0 +1,83 @@ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ARP_H_ +#define _RTE_ARP_H_ + +/** + * @file + * + * ARP-related defines + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ARP header IPv4 payload. + */ +struct arp_ipv4 { + struct ether_addr arp_sha; /**< sender hardware address */ + uint32_t arp_sip; /**< sender IP address */ + struct ether_addr arp_tha; /**< target hardware address */ + uint32_t arp_tip; /**< target IP address */ +} __attribute__((__packed__)); + +/** + * ARP header. + */ +struct arp_hdr { + uint16_t arp_hrd; /* format of hardware address */ +#define ARP_HRD_ETHER 1 /* ARP Ethernet address format */ + + uint16_t arp_pro; /* format of protocol address */ + uint8_t arp_hln; /* length of hardware address */ + uint8_t arp_pln; /* length of protocol address */ + uint16_t arp_op; /* ARP opcode (command) */ +#define ARP_OP_REQUEST 1 /* request to resolve address */ +#define ARP_OP_REPLY 2 /* response to previous request */ +#define ARP_OP_REVREQUEST 3 /* request proto addr given hardware */ +#define ARP_OP_REVREPLY 4 /* response giving protocol address */ +#define ARP_OP_INVREQUEST 8 /* request to identify peer */ +#define ARP_OP_INVREPLY 9 /* response identifying peer */ + + struct arp_ipv4 arp_data; +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ARP_H_ */ diff --git a/lib/librte_net/rte_icmp.h b/lib/librte_net/rte_icmp.h new file mode 100644 index 00000000..8b287f6d --- /dev/null +++ b/lib/librte_net/rte_icmp.h @@ -0,0 +1,101 @@ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_ICMP_H_ +#define _RTE_ICMP_H_ + +/** + * @file + * + * ICMP-related defines + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * ICMP Header + */ +struct icmp_hdr { + uint8_t icmp_type; /* ICMP packet type. */ + uint8_t icmp_code; /* ICMP packet code. */ + uint16_t icmp_cksum; /* ICMP packet checksum. */ + uint16_t icmp_ident; /* ICMP packet identifier. */ + uint16_t icmp_seq_nb; /* ICMP packet sequence number. */ +} __attribute__((__packed__)); + +/* ICMP packet types */ +#define IP_ICMP_ECHO_REPLY 0 +#define IP_ICMP_ECHO_REQUEST 8 + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_ICMP_H_ */ diff --git a/lib/librte_net/rte_ip.h b/lib/librte_net/rte_ip.h new file mode 100644 index 00000000..5b7554ab --- /dev/null +++ b/lib/librte_net/rte_ip.h @@ -0,0 +1,413 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_IP_H_ +#define _RTE_IP_H_ + +/** + * @file + * + * IP-related defines + */ + +#include +#include + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * IPv4 Header + */ +struct ipv4_hdr { + uint8_t version_ihl; /**< version and header length */ + uint8_t type_of_service; /**< type of service */ + uint16_t total_length; /**< length of packet */ + uint16_t packet_id; /**< packet ID */ + uint16_t fragment_offset; /**< fragmentation offset */ + uint8_t time_to_live; /**< time to live */ + uint8_t next_proto_id; /**< protocol ID */ + uint16_t hdr_checksum; /**< header checksum */ + uint32_t src_addr; /**< source address */ + uint32_t dst_addr; /**< destination address */ +} __attribute__((__packed__)); + +/** Create IPv4 address */ +#define IPv4(a,b,c,d) ((uint32_t)(((a) & 0xff) << 24) | \ + (((b) & 0xff) << 16) | \ + (((c) & 0xff) << 8) | \ + ((d) & 0xff)) + +/** Maximal IPv4 packet length (including a header) */ +#define IPV4_MAX_PKT_LEN 65535 + +/** Internet header length mask for version_ihl field */ +#define IPV4_HDR_IHL_MASK (0x0f) +/** + * Internet header length field multiplier (IHL field specifies overall header + * length in number of 4-byte words) + */ +#define IPV4_IHL_MULTIPLIER (4) + +/* Fragment Offset * Flags. */ +#define IPV4_HDR_DF_SHIFT 14 +#define IPV4_HDR_MF_SHIFT 13 +#define IPV4_HDR_FO_SHIFT 3 + +#define IPV4_HDR_DF_FLAG (1 << IPV4_HDR_DF_SHIFT) +#define IPV4_HDR_MF_FLAG (1 << IPV4_HDR_MF_SHIFT) + +#define IPV4_HDR_OFFSET_MASK ((1 << IPV4_HDR_MF_SHIFT) - 1) + +#define IPV4_HDR_OFFSET_UNITS 8 + +/* + * IPv4 address types + */ +#define IPV4_ANY ((uint32_t)0x00000000) /**< 0.0.0.0 */ +#define IPV4_LOOPBACK ((uint32_t)0x7f000001) /**< 127.0.0.1 */ +#define IPV4_BROADCAST ((uint32_t)0xe0000000) /**< 224.0.0.0 */ +#define IPV4_ALLHOSTS_GROUP ((uint32_t)0xe0000001) /**< 224.0.0.1 */ +#define IPV4_ALLRTRS_GROUP ((uint32_t)0xe0000002) /**< 224.0.0.2 */ +#define IPV4_MAX_LOCAL_GROUP ((uint32_t)0xe00000ff) /**< 224.0.0.255 */ + +/* + * IPv4 Multicast-related macros + */ +#define IPV4_MIN_MCAST IPv4(224, 0, 0, 0) /**< Minimal IPv4-multicast address */ +#define IPV4_MAX_MCAST IPv4(239, 255, 255, 255) /**< Maximum IPv4 multicast address */ + +#define IS_IPV4_MCAST(x) \ + ((x) >= IPV4_MIN_MCAST && (x) <= IPV4_MAX_MCAST) /**< check if IPv4 address is multicast */ + +/** + * @internal Calculate a sum of all words in the buffer. + * Helper routine for the rte_raw_cksum(). + * + * @param buf + * Pointer to the buffer. + * @param len + * Length of the buffer. + * @param sum + * Initial value of the sum. + * @return + * sum += Sum of all words in the buffer. + */ +static inline uint32_t +__rte_raw_cksum(const void *buf, size_t len, uint32_t sum) +{ + /* workaround gcc strict-aliasing warning */ + uintptr_t ptr = (uintptr_t)buf; + typedef uint16_t __attribute__((__may_alias__)) u16_p; + const u16_p *u16 = (const u16_p *)ptr; + + while (len >= (sizeof(*u16) * 4)) { + sum += u16[0]; + sum += u16[1]; + sum += u16[2]; + sum += u16[3]; + len -= sizeof(*u16) * 4; + u16 += 4; + } + while (len >= sizeof(*u16)) { + sum += *u16; + len -= sizeof(*u16); + u16 += 1; + } + + /* if length is in odd bytes */ + if (len == 1) + sum += *((const uint8_t *)u16); + + return sum; +} + +/** + * @internal Reduce a sum to the non-complemented checksum. + * Helper routine for the rte_raw_cksum(). + * + * @param sum + * Value of the sum. + * @return + * The non-complemented checksum. + */ +static inline uint16_t +__rte_raw_cksum_reduce(uint32_t sum) +{ + sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); + sum = ((sum & 0xffff0000) >> 16) + (sum & 0xffff); + return (uint16_t)sum; +} + +/** + * Process the non-complemented checksum of a buffer. + * + * @param buf + * Pointer to the buffer. + * @param len + * Length of the buffer. + * @return + * The non-complemented checksum. + */ +static inline uint16_t +rte_raw_cksum(const void *buf, size_t len) +{ + uint32_t sum; + + sum = __rte_raw_cksum(buf, len, 0); + return __rte_raw_cksum_reduce(sum); +} + +/** + * Process the IPv4 checksum of an IPv4 header. + * + * The checksum field must be set to 0 by the caller. + * + * @param ipv4_hdr + * The pointer to the contiguous IPv4 header. + * @return + * The complemented checksum to set in the IP packet. + */ +static inline uint16_t +rte_ipv4_cksum(const struct ipv4_hdr *ipv4_hdr) +{ + uint16_t cksum; + cksum = rte_raw_cksum(ipv4_hdr, sizeof(struct ipv4_hdr)); + return (cksum == 0xffff) ? cksum : ~cksum; +} + +/** + * Process the pseudo-header checksum of an IPv4 header. + * + * The checksum field must be set to 0 by the caller. + * + * Depending on the ol_flags, the pseudo-header checksum expected by the + * drivers is not the same. For instance, when TSO is enabled, the IP + * payload length must not be included in the packet. + * + * When ol_flags is 0, it computes the standard pseudo-header checksum. + * + * @param ipv4_hdr + * The pointer to the contiguous IPv4 header. + * @param ol_flags + * The ol_flags of the associated mbuf. + * @return + * The non-complemented checksum to set in the L4 header. + */ +static inline uint16_t +rte_ipv4_phdr_cksum(const struct ipv4_hdr *ipv4_hdr, uint64_t ol_flags) +{ + struct ipv4_psd_header { + uint32_t src_addr; /* IP address of source host. */ + uint32_t dst_addr; /* IP address of destination host. */ + uint8_t zero; /* zero. */ + uint8_t proto; /* L4 protocol type. */ + uint16_t len; /* L4 length. */ + } psd_hdr; + + psd_hdr.src_addr = ipv4_hdr->src_addr; + psd_hdr.dst_addr = ipv4_hdr->dst_addr; + psd_hdr.zero = 0; + psd_hdr.proto = ipv4_hdr->next_proto_id; + if (ol_flags & PKT_TX_TCP_SEG) { + psd_hdr.len = 0; + } else { + psd_hdr.len = rte_cpu_to_be_16( + (uint16_t)(rte_be_to_cpu_16(ipv4_hdr->total_length) + - sizeof(struct ipv4_hdr))); + } + return rte_raw_cksum(&psd_hdr, sizeof(psd_hdr)); +} + +/** + * Process the IPv4 UDP or TCP checksum. + * + * The IPv4 header should not contains options. The IP and layer 4 + * checksum must be set to 0 in the packet by the caller. + * + * @param ipv4_hdr + * The pointer to the contiguous IPv4 header. + * @param l4_hdr + * The pointer to the beginning of the L4 header. + * @return + * The complemented checksum to set in the IP packet. + */ +static inline uint16_t +rte_ipv4_udptcp_cksum(const struct ipv4_hdr *ipv4_hdr, const void *l4_hdr) +{ + uint32_t cksum; + uint32_t l4_len; + + l4_len = rte_be_to_cpu_16(ipv4_hdr->total_length) - + sizeof(struct ipv4_hdr); + + cksum = rte_raw_cksum(l4_hdr, l4_len); + cksum += rte_ipv4_phdr_cksum(ipv4_hdr, 0); + + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); + cksum = (~cksum) & 0xffff; + if (cksum == 0) + cksum = 0xffff; + + return cksum; +} + +/** + * IPv6 Header + */ +struct ipv6_hdr { + uint32_t vtc_flow; /**< IP version, traffic class & flow label. */ + uint16_t payload_len; /**< IP packet length - includes sizeof(ip_header). */ + uint8_t proto; /**< Protocol, next header. */ + uint8_t hop_limits; /**< Hop limits. */ + uint8_t src_addr[16]; /**< IP address of source host. */ + uint8_t dst_addr[16]; /**< IP address of destination host(s). */ +} __attribute__((__packed__)); + +/** + * Process the pseudo-header checksum of an IPv6 header. + * + * Depending on the ol_flags, the pseudo-header checksum expected by the + * drivers is not the same. For instance, when TSO is enabled, the IPv6 + * payload length must not be included in the packet. + * + * When ol_flags is 0, it computes the standard pseudo-header checksum. + * + * @param ipv6_hdr + * The pointer to the contiguous IPv6 header. + * @param ol_flags + * The ol_flags of the associated mbuf. + * @return + * The non-complemented checksum to set in the L4 header. + */ +static inline uint16_t +rte_ipv6_phdr_cksum(const struct ipv6_hdr *ipv6_hdr, uint64_t ol_flags) +{ + uint32_t sum; + struct { + uint32_t len; /* L4 length. */ + uint32_t proto; /* L4 protocol - top 3 bytes must be zero */ + } psd_hdr; + + psd_hdr.proto = (ipv6_hdr->proto << 24); + if (ol_flags & PKT_TX_TCP_SEG) { + psd_hdr.len = 0; + } else { + psd_hdr.len = ipv6_hdr->payload_len; + } + + sum = __rte_raw_cksum(ipv6_hdr->src_addr, + sizeof(ipv6_hdr->src_addr) + sizeof(ipv6_hdr->dst_addr), + 0); + sum = __rte_raw_cksum(&psd_hdr, sizeof(psd_hdr), sum); + return __rte_raw_cksum_reduce(sum); +} + +/** + * Process the IPv6 UDP or TCP checksum. + * + * The IPv4 header should not contains options. The layer 4 checksum + * must be set to 0 in the packet by the caller. + * + * @param ipv6_hdr + * The pointer to the contiguous IPv6 header. + * @param l4_hdr + * The pointer to the beginning of the L4 header. + * @return + * The complemented checksum to set in the IP packet. + */ +static inline uint16_t +rte_ipv6_udptcp_cksum(const struct ipv6_hdr *ipv6_hdr, const void *l4_hdr) +{ + uint32_t cksum; + uint32_t l4_len; + + l4_len = rte_be_to_cpu_16(ipv6_hdr->payload_len); + + cksum = rte_raw_cksum(l4_hdr, l4_len); + cksum += rte_ipv6_phdr_cksum(ipv6_hdr, 0); + + cksum = ((cksum & 0xffff0000) >> 16) + (cksum & 0xffff); + cksum = (~cksum) & 0xffff; + if (cksum == 0) + cksum = 0xffff; + + return cksum; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_IP_H_ */ diff --git a/lib/librte_net/rte_sctp.h b/lib/librte_net/rte_sctp.h new file mode 100644 index 00000000..688e126f --- /dev/null +++ b/lib/librte_net/rte_sctp.h @@ -0,0 +1,99 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +/** + * @file + * + * SCTP-related defines + */ + +#ifndef _RTE_SCTP_H_ +#define _RTE_SCTP_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * SCTP Header + */ +struct sctp_hdr { + uint16_t src_port; /**< Source port. */ + uint16_t dst_port; /**< Destin port. */ + uint32_t tag; /**< Validation tag. */ + uint32_t cksum; /**< Checksum. */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_SCTP_H_ */ diff --git a/lib/librte_net/rte_tcp.h b/lib/librte_net/rte_tcp.h new file mode 100644 index 00000000..28b61e6d --- /dev/null +++ b/lib/librte_net/rte_tcp.h @@ -0,0 +1,104 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_TCP_H_ +#define _RTE_TCP_H_ + +/** + * @file + * + * TCP-related defines + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * TCP Header + */ +struct tcp_hdr { + uint16_t src_port; /**< TCP source port. */ + uint16_t dst_port; /**< TCP destination port. */ + uint32_t sent_seq; /**< TX data sequence number. */ + uint32_t recv_ack; /**< RX data acknowledgement sequence number. */ + uint8_t data_off; /**< Data offset. */ + uint8_t tcp_flags; /**< TCP flags */ + uint16_t rx_win; /**< RX flow control window. */ + uint16_t cksum; /**< TCP checksum. */ + uint16_t tcp_urp; /**< TCP urgent pointer, if any. */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_TCP_H_ */ diff --git a/lib/librte_net/rte_udp.h b/lib/librte_net/rte_udp.h new file mode 100644 index 00000000..bc5be4af --- /dev/null +++ b/lib/librte_net/rte_udp.h @@ -0,0 +1,99 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Copyright (c) 1982, 1986, 1990, 1993 + * The Regents of the University of California. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)in.h 8.3 (Berkeley) 1/3/94 + * $FreeBSD: src/sys/netinet/in.h,v 1.82 2003/10/25 09:37:10 ume Exp $ + */ + +#ifndef _RTE_UDP_H_ +#define _RTE_UDP_H_ + +/** + * @file + * + * UDP-related defines + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * UDP Header + */ +struct udp_hdr { + uint16_t src_port; /**< UDP source port. */ + uint16_t dst_port; /**< UDP destination port. */ + uint16_t dgram_len; /**< UDP datagram length */ + uint16_t dgram_cksum; /**< UDP datagram checksum */ +} __attribute__((__packed__)); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_UDP_H_ */ diff --git a/lib/librte_pipeline/Makefile b/lib/librte_pipeline/Makefile new file mode 100644 index 00000000..822fd41c --- /dev/null +++ b/lib/librte_pipeline/Makefile @@ -0,0 +1,58 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_pipeline.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_pipeline_version.map + +LIBABIVER := 3 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PIPELINE) := rte_pipeline.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_PIPELINE)-include += rte_pipeline.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) := lib/librte_table +DEPDIRS-$(CONFIG_RTE_LIBRTE_PIPELINE) += lib/librte_port + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_pipeline/rte_pipeline.c b/lib/librte_pipeline/rte_pipeline.c new file mode 100644 index 00000000..7f8fbac5 --- /dev/null +++ b/lib/librte_pipeline/rte_pipeline.c @@ -0,0 +1,1649 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_pipeline.h" + +#define RTE_TABLE_INVALID UINT32_MAX + +#ifdef RTE_PIPELINE_STATS_COLLECT + +#define RTE_PIPELINE_STATS_AH_DROP_WRITE(p, mask) \ + ({ (p)->n_pkts_ah_drop = __builtin_popcountll(mask); }) + +#define RTE_PIPELINE_STATS_AH_DROP_READ(p, counter) \ + ({ (counter) += (p)->n_pkts_ah_drop; (p)->n_pkts_ah_drop = 0; }) + +#define RTE_PIPELINE_STATS_TABLE_DROP0(p) \ + ({ (p)->pkts_drop_mask = (p)->action_mask0[RTE_PIPELINE_ACTION_DROP]; }) + +#define RTE_PIPELINE_STATS_TABLE_DROP1(p, counter) \ +({ \ + uint64_t mask = (p)->action_mask0[RTE_PIPELINE_ACTION_DROP]; \ + mask ^= (p)->pkts_drop_mask; \ + (counter) += __builtin_popcountll(mask); \ +}) + +#else + +#define RTE_PIPELINE_STATS_AH_DROP_WRITE(p, mask) +#define RTE_PIPELINE_STATS_AH_DROP_READ(p, counter) +#define RTE_PIPELINE_STATS_TABLE_DROP0(p) +#define RTE_PIPELINE_STATS_TABLE_DROP1(p, counter) + +#endif + +struct rte_port_in { + /* Input parameters */ + struct rte_port_in_ops ops; + rte_pipeline_port_in_action_handler f_action; + void *arg_ah; + uint32_t burst_size; + + /* The table to which this port is connected */ + uint32_t table_id; + + /* Handle to low-level port */ + void *h_port; + + /* List of enabled ports */ + struct rte_port_in *next; + + /* Statistics */ + uint64_t n_pkts_dropped_by_ah; +}; + +struct rte_port_out { + /* Input parameters */ + struct rte_port_out_ops ops; + rte_pipeline_port_out_action_handler f_action; + void *arg_ah; + + /* Handle to low-level port */ + void *h_port; + + /* Statistics */ + uint64_t n_pkts_dropped_by_ah; +}; + +struct rte_table { + /* Input parameters */ + struct rte_table_ops ops; + rte_pipeline_table_action_handler_hit f_action_hit; + rte_pipeline_table_action_handler_miss f_action_miss; + void *arg_ah; + struct rte_pipeline_table_entry *default_entry; + uint32_t entry_size; + + uint32_t table_next_id; + uint32_t table_next_id_valid; + + /* Handle to the low-level table object */ + void *h_table; + + /* Statistics */ + uint64_t n_pkts_dropped_by_lkp_hit_ah; + uint64_t n_pkts_dropped_by_lkp_miss_ah; + uint64_t n_pkts_dropped_lkp_hit; + uint64_t n_pkts_dropped_lkp_miss; +}; + +#define RTE_PIPELINE_MAX_NAME_SZ 124 + +struct rte_pipeline { + /* Input parameters */ + char name[RTE_PIPELINE_MAX_NAME_SZ]; + int socket_id; + uint32_t offset_port_id; + + /* Internal tables */ + struct rte_port_in ports_in[RTE_PIPELINE_PORT_IN_MAX]; + struct rte_port_out ports_out[RTE_PIPELINE_PORT_OUT_MAX]; + struct rte_table tables[RTE_PIPELINE_TABLE_MAX]; + + /* Occupancy of internal tables */ + uint32_t num_ports_in; + uint32_t num_ports_out; + uint32_t num_tables; + + /* List of enabled ports */ + uint64_t enabled_port_in_mask; + struct rte_port_in *port_in_next; + + /* Pipeline run structures */ + struct rte_mbuf *pkts[RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_pipeline_table_entry *entries[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t action_mask0[RTE_PIPELINE_ACTIONS]; + uint64_t action_mask1[RTE_PIPELINE_ACTIONS]; + uint64_t pkts_mask; + uint64_t n_pkts_ah_drop; + uint64_t pkts_drop_mask; +} __rte_cache_aligned; + +static inline uint32_t +rte_mask_get_next(uint64_t mask, uint32_t pos) +{ + uint64_t mask_rot = (mask << ((63 - pos) & 0x3F)) | + (mask >> ((pos + 1) & 0x3F)); + return (__builtin_ctzll(mask_rot) - (63 - pos)) & 0x3F; +} + +static inline uint32_t +rte_mask_get_prev(uint64_t mask, uint32_t pos) +{ + uint64_t mask_rot = (mask >> (pos & 0x3F)) | + (mask << ((64 - pos) & 0x3F)); + return ((63 - __builtin_clzll(mask_rot)) + pos) & 0x3F; +} + +static void +rte_pipeline_table_free(struct rte_table *table); + +static void +rte_pipeline_port_in_free(struct rte_port_in *port); + +static void +rte_pipeline_port_out_free(struct rte_port_out *port); + +/* + * Pipeline + * + */ +static int +rte_pipeline_check_params(struct rte_pipeline_params *params) +{ + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for parameter params\n", __func__); + return -EINVAL; + } + + /* name */ + if (params->name == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for parameter name\n", __func__); + return -EINVAL; + } + + /* socket */ + if ((params->socket_id < 0) || + (params->socket_id >= RTE_MAX_NUMA_NODES)) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for parameter socket_id\n", + __func__); + return -EINVAL; + } + + return 0; +} + +struct rte_pipeline * +rte_pipeline_create(struct rte_pipeline_params *params) +{ + struct rte_pipeline *p; + int status; + + /* Check input parameters */ + status = rte_pipeline_check_params(params); + if (status != 0) { + RTE_LOG(ERR, PIPELINE, + "%s: Pipeline params check failed (%d)\n", + __func__, status); + return NULL; + } + + /* Allocate memory for the pipeline on requested socket */ + p = rte_zmalloc_socket("PIPELINE", sizeof(struct rte_pipeline), + RTE_CACHE_LINE_SIZE, params->socket_id); + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Pipeline memory allocation failed\n", __func__); + return NULL; + } + + /* Save input parameters */ + snprintf(p->name, RTE_PIPELINE_MAX_NAME_SZ, "%s", params->name); + p->socket_id = params->socket_id; + p->offset_port_id = params->offset_port_id; + + /* Initialize pipeline internal data structure */ + p->num_ports_in = 0; + p->num_ports_out = 0; + p->num_tables = 0; + p->enabled_port_in_mask = 0; + p->port_in_next = NULL; + p->pkts_mask = 0; + p->n_pkts_ah_drop = 0; + + return p; +} + +int +rte_pipeline_free(struct rte_pipeline *p) +{ + uint32_t i; + + /* Check input parameters */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: rte_pipeline parameter is NULL\n", __func__); + return -EINVAL; + } + + /* Free input ports */ + for (i = 0; i < p->num_ports_in; i++) { + struct rte_port_in *port = &p->ports_in[i]; + + rte_pipeline_port_in_free(port); + } + + /* Free tables */ + for (i = 0; i < p->num_tables; i++) { + struct rte_table *table = &p->tables[i]; + + rte_pipeline_table_free(table); + } + + /* Free output ports */ + for (i = 0; i < p->num_ports_out; i++) { + struct rte_port_out *port = &p->ports_out[i]; + + rte_pipeline_port_out_free(port); + } + + /* Free pipeline memory */ + rte_free(p); + + return 0; +} + +/* + * Table + * + */ +static int +rte_table_check_params(struct rte_pipeline *p, + struct rte_pipeline_table_params *params, + uint32_t *table_id) +{ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params parameter is NULL\n", + __func__); + return -EINVAL; + } + if (table_id == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: table_id parameter is NULL\n", + __func__); + return -EINVAL; + } + + /* ops */ + if (params->ops == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params->ops is NULL\n", + __func__); + return -EINVAL; + } + + if (params->ops->f_create == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_create function pointer is NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_lookup == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_lookup function pointer is NULL\n", __func__); + return -EINVAL; + } + + /* De we have room for one more table? */ + if (p->num_tables == RTE_PIPELINE_TABLE_MAX) { + RTE_LOG(ERR, PIPELINE, + "%s: Incorrect value for num_tables parameter\n", + __func__); + return -EINVAL; + } + + return 0; +} + +int +rte_pipeline_table_create(struct rte_pipeline *p, + struct rte_pipeline_table_params *params, + uint32_t *table_id) +{ + struct rte_table *table; + struct rte_pipeline_table_entry *default_entry; + void *h_table; + uint32_t entry_size, id; + int status; + + /* Check input arguments */ + status = rte_table_check_params(p, params, table_id); + if (status != 0) + return status; + + id = p->num_tables; + table = &p->tables[id]; + + /* Allocate space for the default table entry */ + entry_size = sizeof(struct rte_pipeline_table_entry) + + params->action_data_size; + default_entry = (struct rte_pipeline_table_entry *) rte_zmalloc_socket( + "PIPELINE", entry_size, RTE_CACHE_LINE_SIZE, p->socket_id); + if (default_entry == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: Failed to allocate default entry\n", __func__); + return -EINVAL; + } + + /* Create the table */ + h_table = params->ops->f_create(params->arg_create, p->socket_id, + entry_size); + if (h_table == NULL) { + rte_free(default_entry); + RTE_LOG(ERR, PIPELINE, "%s: Table creation failed\n", __func__); + return -EINVAL; + } + + /* Commit current table to the pipeline */ + p->num_tables++; + *table_id = id; + + /* Save input parameters */ + memcpy(&table->ops, params->ops, sizeof(struct rte_table_ops)); + table->f_action_hit = params->f_action_hit; + table->f_action_miss = params->f_action_miss; + table->arg_ah = params->arg_ah; + table->entry_size = entry_size; + + /* Clear the lookup miss actions (to be set later through API) */ + table->default_entry = default_entry; + table->default_entry->action = RTE_PIPELINE_ACTION_DROP; + + /* Initialize table internal data structure */ + table->h_table = h_table; + table->table_next_id = 0; + table->table_next_id_valid = 0; + + return 0; +} + +void +rte_pipeline_table_free(struct rte_table *table) +{ + if (table->ops.f_free != NULL) + table->ops.f_free(table->h_table); + + rte_free(table->default_entry); +} + +int +rte_pipeline_table_default_entry_add(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *default_entry, + struct rte_pipeline_table_entry **default_entry_ptr) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (default_entry == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: default_entry parameter is NULL\n", __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if ((default_entry->action == RTE_PIPELINE_ACTION_TABLE) && + table->table_next_id_valid && + (default_entry->table_id != table->table_next_id)) { + RTE_LOG(ERR, PIPELINE, + "%s: Tree-like topologies not allowed\n", __func__); + return -EINVAL; + } + + /* Set the lookup miss actions */ + if ((default_entry->action == RTE_PIPELINE_ACTION_TABLE) && + (table->table_next_id_valid == 0)) { + table->table_next_id = default_entry->table_id; + table->table_next_id_valid = 1; + } + + memcpy(table->default_entry, default_entry, table->entry_size); + + *default_entry_ptr = table->default_entry; + return 0; +} + +int +rte_pipeline_table_default_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *entry) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: pipeline parameter is NULL\n", __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + /* Save the current contents of the default entry */ + if (entry) + memcpy(entry, table->default_entry, table->entry_size); + + /* Clear the lookup miss actions */ + memset(table->default_entry, 0, table->entry_size); + table->default_entry->action = RTE_PIPELINE_ACTION_DROP; + + return 0; +} + +int +rte_pipeline_table_entry_add(struct rte_pipeline *p, + uint32_t table_id, + void *key, + struct rte_pipeline_table_entry *entry, + int *key_found, + struct rte_pipeline_table_entry **entry_ptr) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + + if (entry == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: entry parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_add == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: f_add function pointer NULL\n", + __func__); + return -EINVAL; + } + + if ((entry->action == RTE_PIPELINE_ACTION_TABLE) && + table->table_next_id_valid && + (entry->table_id != table->table_next_id)) { + RTE_LOG(ERR, PIPELINE, + "%s: Tree-like topologies not allowed\n", __func__); + return -EINVAL; + } + + /* Add entry */ + if ((entry->action == RTE_PIPELINE_ACTION_TABLE) && + (table->table_next_id_valid == 0)) { + table->table_next_id = entry->table_id; + table->table_next_id_valid = 1; + } + + return (table->ops.f_add)(table->h_table, key, (void *) entry, + key_found, (void **) entry_ptr); +} + +int +rte_pipeline_table_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + void *key, + int *key_found, + struct rte_pipeline_table_entry *entry) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (key == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_delete == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_delete function pointer NULL\n", __func__); + return -EINVAL; + } + + return (table->ops.f_delete)(table->h_table, key, key_found, entry); +} + +int rte_pipeline_table_entry_add_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + struct rte_pipeline_table_entry **entries, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries_ptr) +{ + struct rte_table *table; + uint32_t i; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (keys == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: keys parameter is NULL\n", __func__); + return -EINVAL; + } + + if (entries == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: entries parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_add_bulk == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: f_add_bulk function pointer NULL\n", + __func__); + return -EINVAL; + } + + for (i = 0; i < n_keys; i++) { + if ((entries[i]->action == RTE_PIPELINE_ACTION_TABLE) && + table->table_next_id_valid && + (entries[i]->table_id != table->table_next_id)) { + RTE_LOG(ERR, PIPELINE, + "%s: Tree-like topologies not allowed\n", __func__); + return -EINVAL; + } + } + + /* Add entry */ + for (i = 0; i < n_keys; i++) { + if ((entries[i]->action == RTE_PIPELINE_ACTION_TABLE) && + (table->table_next_id_valid == 0)) { + table->table_next_id = entries[i]->table_id; + table->table_next_id_valid = 1; + } + } + + return (table->ops.f_add_bulk)(table->h_table, keys, (void **) entries, + n_keys, key_found, (void **) entries_ptr); +} + +int rte_pipeline_table_entry_delete_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries) +{ + struct rte_table *table; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (keys == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: key parameter is NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table_id %d out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + + if (table->ops.f_delete_bulk == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_delete function pointer NULL\n", __func__); + return -EINVAL; + } + + return (table->ops.f_delete_bulk)(table->h_table, keys, n_keys, key_found, + (void **) entries); +} + +/* + * Port + * + */ +static int +rte_pipeline_port_in_check_params(struct rte_pipeline *p, + struct rte_pipeline_port_in_params *params, + uint32_t *port_id) +{ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params parameter NULL\n", __func__); + return -EINVAL; + } + if (port_id == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: port_id parameter NULL\n", + __func__); + return -EINVAL; + } + + /* ops */ + if (params->ops == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params->ops parameter NULL\n", + __func__); + return -EINVAL; + } + + if (params->ops->f_create == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_create function pointer NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_rx == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: f_rx function pointer NULL\n", + __func__); + return -EINVAL; + } + + /* burst_size */ + if ((params->burst_size == 0) || + (params->burst_size > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PIPELINE, "%s: invalid value for burst_size\n", + __func__); + return -EINVAL; + } + + /* Do we have room for one more port? */ + if (p->num_ports_in == RTE_PIPELINE_PORT_IN_MAX) { + RTE_LOG(ERR, PIPELINE, + "%s: invalid value for num_ports_in\n", __func__); + return -EINVAL; + } + + return 0; +} + +static int +rte_pipeline_port_out_check_params(struct rte_pipeline *p, + struct rte_pipeline_port_out_params *params, + uint32_t *port_id) +{ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (params == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params parameter NULL\n", __func__); + return -EINVAL; + } + + if (port_id == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: port_id parameter NULL\n", + __func__); + return -EINVAL; + } + + /* ops */ + if (params->ops == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: params->ops parameter NULL\n", + __func__); + return -EINVAL; + } + + if (params->ops->f_create == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_create function pointer NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_tx == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_tx function pointer NULL\n", __func__); + return -EINVAL; + } + + if (params->ops->f_tx_bulk == NULL) { + RTE_LOG(ERR, PIPELINE, + "%s: f_tx_bulk function pointer NULL\n", __func__); + return -EINVAL; + } + + /* Do we have room for one more port? */ + if (p->num_ports_out == RTE_PIPELINE_PORT_OUT_MAX) { + RTE_LOG(ERR, PIPELINE, + "%s: invalid value for num_ports_out\n", __func__); + return -EINVAL; + } + + return 0; +} + +int +rte_pipeline_port_in_create(struct rte_pipeline *p, + struct rte_pipeline_port_in_params *params, + uint32_t *port_id) +{ + struct rte_port_in *port; + void *h_port; + uint32_t id; + int status; + + /* Check input arguments */ + status = rte_pipeline_port_in_check_params(p, params, port_id); + if (status != 0) + return status; + + id = p->num_ports_in; + port = &p->ports_in[id]; + + /* Create the port */ + h_port = params->ops->f_create(params->arg_create, p->socket_id); + if (h_port == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: Port creation failed\n", __func__); + return -EINVAL; + } + + /* Commit current table to the pipeline */ + p->num_ports_in++; + *port_id = id; + + /* Save input parameters */ + memcpy(&port->ops, params->ops, sizeof(struct rte_port_in_ops)); + port->f_action = params->f_action; + port->arg_ah = params->arg_ah; + port->burst_size = params->burst_size; + + /* Initialize port internal data structure */ + port->table_id = RTE_TABLE_INVALID; + port->h_port = h_port; + port->next = NULL; + + return 0; +} + +void +rte_pipeline_port_in_free(struct rte_port_in *port) +{ + if (port->ops.f_free != NULL) + port->ops.f_free(port->h_port); +} + +int +rte_pipeline_port_out_create(struct rte_pipeline *p, + struct rte_pipeline_port_out_params *params, + uint32_t *port_id) +{ + struct rte_port_out *port; + void *h_port; + uint32_t id; + int status; + + /* Check input arguments */ + status = rte_pipeline_port_out_check_params(p, params, port_id); + if (status != 0) + return status; + + id = p->num_ports_out; + port = &p->ports_out[id]; + + /* Create the port */ + h_port = params->ops->f_create(params->arg_create, p->socket_id); + if (h_port == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: Port creation failed\n", __func__); + return -EINVAL; + } + + /* Commit current table to the pipeline */ + p->num_ports_out++; + *port_id = id; + + /* Save input parameters */ + memcpy(&port->ops, params->ops, sizeof(struct rte_port_out_ops)); + port->f_action = params->f_action; + port->arg_ah = params->arg_ah; + + /* Initialize port internal data structure */ + port->h_port = h_port; + + return 0; +} + +void +rte_pipeline_port_out_free(struct rte_port_out *port) +{ + if (port->ops.f_free != NULL) + port->ops.f_free(port->h_port); +} + +int +rte_pipeline_port_in_connect_to_table(struct rte_pipeline *p, + uint32_t port_id, + uint32_t table_id) +{ + struct rte_port_in *port; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, + "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: Table ID %u is out of range\n", + __func__, table_id); + return -EINVAL; + } + + port = &p->ports_in[port_id]; + port->table_id = table_id; + + return 0; +} + +int +rte_pipeline_port_in_enable(struct rte_pipeline *p, uint32_t port_id) +{ + struct rte_port_in *port, *port_prev, *port_next; + uint64_t port_mask; + uint32_t port_prev_id, port_next_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, + "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + port = &p->ports_in[port_id]; + + /* Return if current input port is already enabled */ + port_mask = 1LLU << port_id; + if (p->enabled_port_in_mask & port_mask) + return 0; + + p->enabled_port_in_mask |= port_mask; + + /* Add current input port to the pipeline chain of enabled ports */ + port_prev_id = rte_mask_get_prev(p->enabled_port_in_mask, port_id); + port_next_id = rte_mask_get_next(p->enabled_port_in_mask, port_id); + + port_prev = &p->ports_in[port_prev_id]; + port_next = &p->ports_in[port_next_id]; + + port_prev->next = port; + port->next = port_next; + + /* Check if list of enabled ports was previously empty */ + if (p->enabled_port_in_mask == port_mask) + p->port_in_next = port; + + return 0; +} + +int +rte_pipeline_port_in_disable(struct rte_pipeline *p, uint32_t port_id) +{ + struct rte_port_in *port, *port_prev, *port_next; + uint64_t port_mask; + uint32_t port_prev_id, port_next_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + port = &p->ports_in[port_id]; + + /* Return if current input port is already disabled */ + port_mask = 1LLU << port_id; + if ((p->enabled_port_in_mask & port_mask) == 0) + return 0; + + p->enabled_port_in_mask &= ~port_mask; + + /* Return if no other enabled ports */ + if (p->enabled_port_in_mask == 0) { + p->port_in_next = NULL; + + return 0; + } + + /* Add current input port to the pipeline chain of enabled ports */ + port_prev_id = rte_mask_get_prev(p->enabled_port_in_mask, port_id); + port_next_id = rte_mask_get_next(p->enabled_port_in_mask, port_id); + + port_prev = &p->ports_in[port_prev_id]; + port_next = &p->ports_in[port_next_id]; + + port_prev->next = port_next; + + /* Check if the port which has just been disabled is next to serve */ + if (port == p->port_in_next) + p->port_in_next = port_next; + + return 0; +} + +/* + * Pipeline run-time + * + */ +int +rte_pipeline_check(struct rte_pipeline *p) +{ + uint32_t port_in_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + /* Check that pipeline has at least one input port, one table and one + output port */ + if (p->num_ports_in == 0) { + RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 input port\n", + __func__); + return -EINVAL; + } + if (p->num_tables == 0) { + RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 table\n", + __func__); + return -EINVAL; + } + if (p->num_ports_out == 0) { + RTE_LOG(ERR, PIPELINE, "%s: must have at least 1 output port\n", + __func__); + return -EINVAL; + } + + /* Check that all input ports are connected */ + for (port_in_id = 0; port_in_id < p->num_ports_in; port_in_id++) { + struct rte_port_in *port_in = &p->ports_in[port_in_id]; + + if (port_in->table_id == RTE_TABLE_INVALID) { + RTE_LOG(ERR, PIPELINE, + "%s: Port IN ID %u is not connected\n", + __func__, port_in_id); + return -EINVAL; + } + } + + return 0; +} + +static inline void +rte_pipeline_compute_masks(struct rte_pipeline *p, uint64_t pkts_mask) +{ + p->action_mask1[RTE_PIPELINE_ACTION_DROP] = 0; + p->action_mask1[RTE_PIPELINE_ACTION_PORT] = 0; + p->action_mask1[RTE_PIPELINE_ACTION_PORT_META] = 0; + p->action_mask1[RTE_PIPELINE_ACTION_TABLE] = 0; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + uint64_t pkt_mask = 1LLU << i; + uint32_t pos = p->entries[i]->action; + + p->action_mask1[pos] |= pkt_mask; + } + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + uint32_t pos; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + pos = p->entries[i]->action; + p->action_mask1[pos] |= pkt_mask; + } + } +} + +static inline void +rte_pipeline_action_handler_port_bulk(struct rte_pipeline *p, + uint64_t pkts_mask, uint32_t port_id) +{ + struct rte_port_out *port_out = &p->ports_out[port_id]; + + p->pkts_mask = pkts_mask; + + /* Output port user actions */ + if (port_out->f_action != NULL) { + port_out->f_action(p, p->pkts, pkts_mask, port_out->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + port_out->n_pkts_dropped_by_ah); + } + + /* Output port TX */ + if (p->pkts_mask != 0) + port_out->ops.f_tx_bulk(port_out->h_port, + p->pkts, + p->pkts_mask); +} + +static inline void +rte_pipeline_action_handler_port(struct rte_pipeline *p, uint64_t pkts_mask) +{ + p->pkts_mask = pkts_mask; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = p->pkts[i]; + uint32_t port_out_id = p->entries[i]->port_id; + struct rte_port_out *port_out = + &p->ports_out[port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + uint64_t pkt_mask = 1LLU << i; + + port_out->f_action(p, + p->pkts, + pkt_mask, + port_out->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + port_out->n_pkts_dropped_by_ah); + + /* Output port TX */ + if (pkt_mask & p->pkts_mask) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + struct rte_mbuf *pkt; + struct rte_port_out *port_out; + uint32_t port_out_id; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + pkt = p->pkts[i]; + port_out_id = p->entries[i]->port_id; + port_out = &p->ports_out[port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + port_out->f_action(p, + p->pkts, + pkt_mask, + port_out->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + port_out->n_pkts_dropped_by_ah); + + /* Output port TX */ + if (pkt_mask & p->pkts_mask) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } +} + +static inline void +rte_pipeline_action_handler_port_meta(struct rte_pipeline *p, + uint64_t pkts_mask) +{ + p->pkts_mask = pkts_mask; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = p->pkts[i]; + uint32_t port_out_id = + RTE_MBUF_METADATA_UINT32(pkt, + p->offset_port_id); + struct rte_port_out *port_out = &p->ports_out[ + port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + uint64_t pkt_mask = 1LLU << i; + + port_out->f_action(p, + p->pkts, + pkt_mask, + port_out->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + port_out->n_pkts_dropped_by_ah); + + /* Output port TX */ + if (pkt_mask & p->pkts_mask) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + struct rte_mbuf *pkt; + struct rte_port_out *port_out; + uint32_t port_out_id; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + pkt = p->pkts[i]; + port_out_id = RTE_MBUF_METADATA_UINT32(pkt, + p->offset_port_id); + port_out = &p->ports_out[port_out_id]; + + /* Output port user actions */ + if (port_out->f_action == NULL) /* Output port TX */ + port_out->ops.f_tx(port_out->h_port, pkt); + else { + port_out->f_action(p, + p->pkts, + pkt_mask, + port_out->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + port_out->n_pkts_dropped_by_ah); + + /* Output port TX */ + if (pkt_mask & p->pkts_mask) + port_out->ops.f_tx(port_out->h_port, + pkt); + } + } + } +} + +static inline void +rte_pipeline_action_handler_drop(struct rte_pipeline *p, uint64_t pkts_mask) +{ + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) + rte_pktmbuf_free(p->pkts[i]); + } else { + uint32_t i; + + for (i = 0; i < RTE_PORT_IN_BURST_SIZE_MAX; i++) { + uint64_t pkt_mask = 1LLU << i; + + if ((pkt_mask & pkts_mask) == 0) + continue; + + rte_pktmbuf_free(p->pkts[i]); + } + } +} + +int +rte_pipeline_run(struct rte_pipeline *p) +{ + struct rte_port_in *port_in = p->port_in_next; + uint32_t n_pkts, table_id; + + if (port_in == NULL) + return 0; + + /* Input port RX */ + n_pkts = port_in->ops.f_rx(port_in->h_port, p->pkts, + port_in->burst_size); + if (n_pkts == 0) { + p->port_in_next = port_in->next; + return 0; + } + + p->pkts_mask = RTE_LEN2MASK(n_pkts, uint64_t); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] = 0; + p->action_mask0[RTE_PIPELINE_ACTION_PORT] = 0; + p->action_mask0[RTE_PIPELINE_ACTION_PORT_META] = 0; + p->action_mask0[RTE_PIPELINE_ACTION_TABLE] = 0; + + /* Input port user actions */ + if (port_in->f_action != NULL) { + port_in->f_action(p, p->pkts, n_pkts, port_in->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + port_in->n_pkts_dropped_by_ah); + } + + /* Table */ + for (table_id = port_in->table_id; p->pkts_mask != 0; ) { + struct rte_table *table; + uint64_t lookup_hit_mask, lookup_miss_mask; + + /* Lookup */ + table = &p->tables[table_id]; + table->ops.f_lookup(table->h_table, p->pkts, p->pkts_mask, + &lookup_hit_mask, (void **) p->entries); + lookup_miss_mask = p->pkts_mask & (~lookup_hit_mask); + + /* Lookup miss */ + if (lookup_miss_mask != 0) { + struct rte_pipeline_table_entry *default_entry = + table->default_entry; + + p->pkts_mask = lookup_miss_mask; + + /* Table user actions */ + if (table->f_action_miss != NULL) { + table->f_action_miss(p, + p->pkts, + lookup_miss_mask, + default_entry, + table->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + table->n_pkts_dropped_by_lkp_miss_ah); + } + + /* Table reserved actions */ + if ((default_entry->action == RTE_PIPELINE_ACTION_PORT) && + (p->pkts_mask != 0)) + rte_pipeline_action_handler_port_bulk(p, + p->pkts_mask, + default_entry->port_id); + else { + uint32_t pos = default_entry->action; + + RTE_PIPELINE_STATS_TABLE_DROP0(p); + + p->action_mask0[pos] |= p->pkts_mask; + + RTE_PIPELINE_STATS_TABLE_DROP1(p, + table->n_pkts_dropped_lkp_miss); + } + } + + /* Lookup hit */ + if (lookup_hit_mask != 0) { + p->pkts_mask = lookup_hit_mask; + + /* Table user actions */ + if (table->f_action_hit != NULL) { + table->f_action_hit(p, + p->pkts, + lookup_hit_mask, + p->entries, + table->arg_ah); + + RTE_PIPELINE_STATS_AH_DROP_READ(p, + table->n_pkts_dropped_by_lkp_hit_ah); + } + + /* Table reserved actions */ + RTE_PIPELINE_STATS_TABLE_DROP0(p); + rte_pipeline_compute_masks(p, p->pkts_mask); + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_DROP]; + p->action_mask0[RTE_PIPELINE_ACTION_PORT] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_PORT]; + p->action_mask0[RTE_PIPELINE_ACTION_PORT_META] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_PORT_META]; + p->action_mask0[RTE_PIPELINE_ACTION_TABLE] |= + p->action_mask1[ + RTE_PIPELINE_ACTION_TABLE]; + + RTE_PIPELINE_STATS_TABLE_DROP1(p, + table->n_pkts_dropped_lkp_hit); + } + + /* Prepare for next iteration */ + p->pkts_mask = p->action_mask0[RTE_PIPELINE_ACTION_TABLE]; + table_id = table->table_next_id; + p->action_mask0[RTE_PIPELINE_ACTION_TABLE] = 0; + } + + /* Table reserved action PORT */ + rte_pipeline_action_handler_port(p, + p->action_mask0[RTE_PIPELINE_ACTION_PORT]); + + /* Table reserved action PORT META */ + rte_pipeline_action_handler_port_meta(p, + p->action_mask0[RTE_PIPELINE_ACTION_PORT_META]); + + /* Table reserved action DROP */ + rte_pipeline_action_handler_drop(p, + p->action_mask0[RTE_PIPELINE_ACTION_DROP]); + + /* Pick candidate for next port IN to serve */ + p->port_in_next = port_in->next; + + return (int) n_pkts; +} + +int +rte_pipeline_flush(struct rte_pipeline *p) +{ + uint32_t port_id; + + /* Check input arguments */ + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + for (port_id = 0; port_id < p->num_ports_out; port_id++) { + struct rte_port_out *port = &p->ports_out[port_id]; + + if (port->ops.f_flush != NULL) + port->ops.f_flush(port->h_port); + } + + return 0; +} + +int +rte_pipeline_port_out_packet_insert(struct rte_pipeline *p, + uint32_t port_id, struct rte_mbuf *pkt) +{ + struct rte_port_out *port_out = &p->ports_out[port_id]; + + port_out->ops.f_tx(port_out->h_port, pkt); /* Output port TX */ + + return 0; +} + +int rte_pipeline_ah_packet_hijack(struct rte_pipeline *p, + uint64_t pkts_mask) +{ + pkts_mask &= p->pkts_mask; + p->pkts_mask &= ~pkts_mask; + + return 0; +} + +int rte_pipeline_ah_packet_drop(struct rte_pipeline *p, + uint64_t pkts_mask) +{ + pkts_mask &= p->pkts_mask; + p->pkts_mask &= ~pkts_mask; + p->action_mask0[RTE_PIPELINE_ACTION_DROP] |= pkts_mask; + + RTE_PIPELINE_STATS_AH_DROP_WRITE(p, pkts_mask); + return 0; +} + +int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_in_stats *stats, int clear) +{ + struct rte_port_in *port; + int retval; + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_in) { + RTE_LOG(ERR, PIPELINE, + "%s: port IN ID %u is out of range\n", + __func__, port_id); + return -EINVAL; + } + + port = &p->ports_in[port_id]; + + if (port->ops.f_stats != NULL) { + retval = port->ops.f_stats(port->h_port, &stats->stats, clear); + if (retval) + return retval; + } else if (stats != NULL) + memset(&stats->stats, 0, sizeof(stats->stats)); + + if (stats != NULL) + stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah; + + if (clear != 0) + port->n_pkts_dropped_by_ah = 0; + + return 0; +} + +int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_out_stats *stats, int clear) +{ + struct rte_port_out *port; + int retval; + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", __func__); + return -EINVAL; + } + + if (port_id >= p->num_ports_out) { + RTE_LOG(ERR, PIPELINE, + "%s: port OUT ID %u is out of range\n", __func__, port_id); + return -EINVAL; + } + + port = &p->ports_out[port_id]; + if (port->ops.f_stats != NULL) { + retval = port->ops.f_stats(port->h_port, &stats->stats, clear); + if (retval != 0) + return retval; + } else if (stats != NULL) + memset(&stats->stats, 0, sizeof(stats->stats)); + + if (stats != NULL) + stats->n_pkts_dropped_by_ah = port->n_pkts_dropped_by_ah; + + if (clear != 0) + port->n_pkts_dropped_by_ah = 0; + + return 0; +} + +int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id, + struct rte_pipeline_table_stats *stats, int clear) +{ + struct rte_table *table; + int retval; + + if (p == NULL) { + RTE_LOG(ERR, PIPELINE, "%s: pipeline parameter NULL\n", + __func__); + return -EINVAL; + } + + if (table_id >= p->num_tables) { + RTE_LOG(ERR, PIPELINE, + "%s: table %u is out of range\n", __func__, table_id); + return -EINVAL; + } + + table = &p->tables[table_id]; + if (table->ops.f_stats != NULL) { + retval = table->ops.f_stats(table->h_table, &stats->stats, clear); + if (retval != 0) + return retval; + } else if (stats != NULL) + memset(&stats->stats, 0, sizeof(stats->stats)); + + if (stats != NULL) { + stats->n_pkts_dropped_by_lkp_hit_ah = + table->n_pkts_dropped_by_lkp_hit_ah; + stats->n_pkts_dropped_by_lkp_miss_ah = + table->n_pkts_dropped_by_lkp_miss_ah; + stats->n_pkts_dropped_lkp_hit = table->n_pkts_dropped_lkp_hit; + stats->n_pkts_dropped_lkp_miss = table->n_pkts_dropped_lkp_miss; + } + + if (clear != 0) { + table->n_pkts_dropped_by_lkp_hit_ah = 0; + table->n_pkts_dropped_by_lkp_miss_ah = 0; + table->n_pkts_dropped_lkp_hit = 0; + table->n_pkts_dropped_lkp_miss = 0; + } + + return 0; +} diff --git a/lib/librte_pipeline/rte_pipeline.h b/lib/librte_pipeline/rte_pipeline.h new file mode 100644 index 00000000..84d18025 --- /dev/null +++ b/lib/librte_pipeline/rte_pipeline.h @@ -0,0 +1,875 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PIPELINE_H__ +#define __INCLUDE_RTE_PIPELINE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Pipeline + * + * This tool is part of the DPDK Packet Framework tool suite and provides + * a standard methodology (logically similar to OpenFlow) for rapid development + * of complex packet processing pipelines out of ports, tables and actions. + * + * Basic operation. A pipeline is constructed by connecting its input + * ports to its output ports through a chain of lookup tables. As result of + * lookup operation into the current table, one of the table entries (or the + * default table entry, in case of lookup miss) is identified to provide the + * actions to be executed on the current packet and the associated action + * meta-data. The behavior of user actions is defined through the configurable + * table action handler, while the reserved actions define the next hop for the + * current packet (either another table, an output port or packet drop) and are + * handled transparently by the framework. + * + * Initialization and run-time flows. Once all the pipeline elements + * (input ports, tables, output ports) have been created, input ports connected + * to tables, table action handlers configured, tables populated with the + * initial set of entries (actions and action meta-data) and input ports + * enabled, the pipeline runs automatically, pushing packets from input ports + * to tables and output ports. At each table, the identified user actions are + * being executed, resulting in action meta-data (stored in the table entry) + * and packet meta-data (stored with the packet descriptor) being updated. The + * pipeline tables can have further updates and input ports can be disabled or + * enabled later on as required. + * + * Multi-core scaling. Typically, each CPU core will run its own + * pipeline instance. Complex application-level pipelines can be implemented by + * interconnecting multiple CPU core-level pipelines in tree-like topologies, + * as the same port devices (e.g. SW rings) can serve as output ports for the + * pipeline running on CPU core A, as well as input ports for the pipeline + * running on CPU core B. This approach enables the application development + * using the pipeline (CPU cores connected serially), cluster/run-to-completion + * (CPU cores connected in parallel) or mixed (pipeline of CPU core clusters) + * programming models. + * + * Thread safety. It is possible to have multiple pipelines running on + * the same CPU core, but it is not allowed (for thread safety reasons) to have + * multiple CPU cores running the same pipeline instance. + * + ***/ + +#include + +#include +#include + +struct rte_mbuf; + +/* + * Pipeline + * + */ +/** Opaque data type for pipeline */ +struct rte_pipeline; + +/** Parameters for pipeline creation */ +struct rte_pipeline_params { + /** Pipeline name */ + const char *name; + + /** CPU socket ID where memory for the pipeline and its elements (ports + and tables) should be allocated */ + int socket_id; + + /** Offset within packet meta-data to port_id to be used by action + "Send packet to output port read from packet meta-data". Has to be + 4-byte aligned. */ + uint32_t offset_port_id; +}; + +/** Pipeline port in stats. */ +struct rte_pipeline_port_in_stats { + /** Port in stats. */ + struct rte_port_in_stats stats; + + /** Number of packets dropped by action handler. */ + uint64_t n_pkts_dropped_by_ah; + +}; + +/** Pipeline port out stats. */ +struct rte_pipeline_port_out_stats { + /** Port out stats. */ + struct rte_port_out_stats stats; + + /** Number of packets dropped by action handler. */ + uint64_t n_pkts_dropped_by_ah; +}; + +/** Pipeline table stats. */ +struct rte_pipeline_table_stats { + /** Table stats. */ + struct rte_table_stats stats; + + /** Number of packets dropped by lookup hit action handler. */ + uint64_t n_pkts_dropped_by_lkp_hit_ah; + + /** Number of packets dropped by lookup miss action handler. */ + uint64_t n_pkts_dropped_by_lkp_miss_ah; + + /** Number of packets dropped by pipeline in behalf of this + * table based on action specified in table entry. */ + uint64_t n_pkts_dropped_lkp_hit; + + /** Number of packets dropped by pipeline in behalf of this + * table based on action specified in table entry. */ + uint64_t n_pkts_dropped_lkp_miss; +}; + +/** + * Pipeline create + * + * @param params + * Parameters for pipeline creation + * @return + * Handle to pipeline instance on success or NULL otherwise + */ +struct rte_pipeline *rte_pipeline_create(struct rte_pipeline_params *params); + +/** + * Pipeline free + * + * @param p + * Handle to pipeline instance + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_free(struct rte_pipeline *p); + +/** + * Pipeline consistency check + * + * @param p + * Handle to pipeline instance + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_check(struct rte_pipeline *p); + +/** + * Pipeline run + * + * @param p + * Handle to pipeline instance + * @return + * Number of packets read and processed + */ +int rte_pipeline_run(struct rte_pipeline *p); + +/** + * Pipeline flush + * + * @param p + * Handle to pipeline instance + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_flush(struct rte_pipeline *p); + +/* + * Actions + * + */ +/** Reserved actions */ +enum rte_pipeline_action { + /** Drop the packet */ + RTE_PIPELINE_ACTION_DROP = 0, + + /** Send packet to output port */ + RTE_PIPELINE_ACTION_PORT, + + /** Send packet to output port read from packet meta-data */ + RTE_PIPELINE_ACTION_PORT_META, + + /** Send packet to table */ + RTE_PIPELINE_ACTION_TABLE, + + /** Number of reserved actions */ + RTE_PIPELINE_ACTIONS +}; + +/* + * Table + * + */ +/** Maximum number of tables allowed for any given pipeline instance. The + value of this parameter cannot be changed. */ +#define RTE_PIPELINE_TABLE_MAX 64 + +/** + * Head format for the table entry of any pipeline table. For any given + * pipeline table, all table entries should have the same size and format. For + * any given pipeline table, the table entry has to start with a head of this + * structure, which contains the reserved actions and their associated + * meta-data, and then optionally continues with user actions and their + * associated meta-data. As all the currently defined reserved actions are + * mutually exclusive, only one reserved action can be set per table entry. + */ +struct rte_pipeline_table_entry { + /** Reserved action */ + enum rte_pipeline_action action; + + union { + /** Output port ID (meta-data for "Send packet to output port" + action) */ + uint32_t port_id; + /** Table ID (meta-data for "Send packet to table" action) */ + uint32_t table_id; + }; + /** Start of table entry area for user defined actions and meta-data */ + uint8_t action_data[0]; +}; + +/** + * Pipeline table action handler on lookup hit + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param p + * Handle to pipeline instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet and element n of entries array is pointing to a valid table + * entry associated with the packet, with the association typically done by + * the table lookup operation. Otherwise, element n of pkts array and element + * n of entries array will not be accessed. + * @param entries + * Set of table entries specified as array of up to 64 pointers to struct + * rte_pipeline_table_entry + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_table_action_handler_hit)( + struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + struct rte_pipeline_table_entry **entries, + void *arg); + +/** + * Pipeline table action handler on lookup miss + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param p + * Handle to pipeline instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array will not be accessed. + * @param entry + * Single table entry associated with all the valid packets from the input + * burst, specified as pointer to struct rte_pipeline_table_entry. + * This entry is the pipeline table default entry that is associated by the + * table lookup operation with the input packets that have resulted in lookup + * miss. + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_table_action_handler_miss)( + struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + struct rte_pipeline_table_entry *entry, + void *arg); + +/** Parameters for pipeline table creation. Action handlers have to be either + both enabled or both disabled (they can be disabled by setting them to + NULL). */ +struct rte_pipeline_table_params { + /** Table operations (specific to each table type) */ + struct rte_table_ops *ops; + /** Opaque param to be passed to the table create operation when + invoked */ + void *arg_create; + /** Callback function to execute the user actions on input packets in + case of lookup hit */ + rte_pipeline_table_action_handler_hit f_action_hit; + /** Callback function to execute the user actions on input packets in + case of lookup miss */ + rte_pipeline_table_action_handler_miss f_action_miss; + + /** Opaque parameter to be passed to lookup hit and/or lookup miss + action handlers when invoked */ + void *arg_ah; + /** Memory size to be reserved per table entry for storing the user + actions and their meta-data */ + uint32_t action_data_size; +}; + +/** + * Pipeline table create + * + * @param p + * Handle to pipeline instance + * @param params + * Parameters for pipeline table creation + * @param table_id + * Table ID. Valid only within the scope of table IDs of the current + * pipeline. Only returned after a successful invocation. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_create(struct rte_pipeline *p, + struct rte_pipeline_table_params *params, + uint32_t *table_id); + +/** + * Pipeline table default entry add + * + * The contents of the table default entry is updated with the provided actions + * and meta-data. When the default entry is not configured (by using this + * function), the built-in default entry has the action "Drop" and meta-data + * set to all-zeros. + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param default_entry + * New contents for the table default entry + * @param default_entry_ptr + * On successful invocation, pointer to the default table entry which can be + * used for further read-write accesses to this table entry. This pointer + * is valid until the default entry is deleted or re-added. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_default_entry_add(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *default_entry, + struct rte_pipeline_table_entry **default_entry_ptr); + +/** + * Pipeline table default entry delete + * + * The new contents of the table default entry is set to reserved action "Drop + * the packet" with meta-data cleared (i.e. set to all-zeros). + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param entry + * On successful invocation, when entry points to a valid buffer, the + * previous contents of the table default entry (as it was just before the + * delete operation) is copied to this buffer + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_default_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + struct rte_pipeline_table_entry *entry); + +/** + * Pipeline table entry add + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param key + * Table entry key + * @param entry + * New contents for the table entry identified by key + * @param key_found + * On successful invocation, set to TRUE (value different than 0) if key was + * already present in the table before the add operation and to FALSE (value + * 0) if not + * @param entry_ptr + * On successful invocation, pointer to the table entry associated with key. + * This can be used for further read-write accesses to this table entry and + * is valid until the key is deleted from the table or re-added (usually for + * associating different actions and/or action meta-data to the current key) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_add(struct rte_pipeline *p, + uint32_t table_id, + void *key, + struct rte_pipeline_table_entry *entry, + int *key_found, + struct rte_pipeline_table_entry **entry_ptr); + +/** + * Pipeline table entry delete + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param key + * Table entry key + * @param key_found + * On successful invocation, set to TRUE (value different than 0) if key was + * found in the table before the delete operation and to FALSE (value 0) if + * not + * @param entry + * On successful invocation, when key is found in the table and entry points + * to a valid buffer, the table entry contents (as it was before the delete + * was performed) is copied to this buffer + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_delete(struct rte_pipeline *p, + uint32_t table_id, + void *key, + int *key_found, + struct rte_pipeline_table_entry *entry); + +/** + * Pipeline table entry add bulk + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param keys + * Array containing table entry keys + * @param entries + * Array containung new contents for every table entry identified by key + * @param n_keys + * Number of keys to add + * @param key_found + * On successful invocation, key_found for every item in the array is set to + * TRUE (value different than 0) if key was already present in the table + * before the add operation and to FALSE (value 0) if not + * @param entries_ptr + * On successful invocation, array *entries_ptr stores pointer to every table + * entry associated with key. This can be used for further read-write accesses + * to this table entry and is valid until the key is deleted from the table or + * re-added (usually for associating different actions and/or action meta-data + * to the current key) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_add_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + struct rte_pipeline_table_entry **entries, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries_ptr); + +/** + * Pipeline table entry delete bulk + * + * @param p + * Handle to pipeline instance + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @param keys + * Array containing table entry keys + * @param n_keys + * Number of keys to delete + * @param key_found + * On successful invocation, key_found for every item in the array is set to + * TRUE (value different than 0) if key was found in the table before the + * delete operation and to FALSE (value 0) if not + * @param entries + * If entries pointer is NULL, this pointer is ignored for every entry found. + * Else, after successful invocation, if specific key is found in the table + * and entry points to a valid buffer, the table entry contents (as it was + * before the delete was performed) is copied to this buffer. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_entry_delete_bulk(struct rte_pipeline *p, + uint32_t table_id, + void **keys, + uint32_t n_keys, + int *key_found, + struct rte_pipeline_table_entry **entries); + +/** + * Read pipeline table stats. + * + * This function reads table statistics identified by *table_id* of given + * pipeline *p*. + * + * @param p + * Handle to pipeline instance. + * @param table_id + * Port ID what stats will be returned. + * @param stats + * Statistics buffer. + * @param clear + * If not 0 clear stats after reading. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_table_stats_read(struct rte_pipeline *p, uint32_t table_id, + struct rte_pipeline_table_stats *stats, int clear); + +/* + * Port IN + * + */ +/** Maximum number of input ports allowed for any given pipeline instance. The + value of this parameter cannot be changed. */ +#define RTE_PIPELINE_PORT_IN_MAX 64 + +/** + * Pipeline input port action handler + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param p + * Handle to pipeline instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param n + * Number of packets in the input burst. This parameter specifies that + * elements 0 to (n-1) of pkts array are valid. + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_port_in_action_handler)( + struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint32_t n, + void *arg); + +/** Parameters for pipeline input port creation */ +struct rte_pipeline_port_in_params { + /** Input port operations (specific to each table type) */ + struct rte_port_in_ops *ops; + /** Opaque parameter to be passed to create operation when invoked */ + void *arg_create; + + /** Callback function to execute the user actions on input packets. + Disabled if set to NULL. */ + rte_pipeline_port_in_action_handler f_action; + /** Opaque parameter to be passed to the action handler when invoked */ + void *arg_ah; + + /** Recommended burst size for the RX operation(in number of pkts) */ + uint32_t burst_size; +}; + +/** + * Pipeline input port create + * + * @param p + * Handle to pipeline instance + * @param params + * Parameters for pipeline input port creation + * @param port_id + * Input port ID. Valid only within the scope of input port IDs of the + * current pipeline. Only returned after a successful invocation. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_create(struct rte_pipeline *p, + struct rte_pipeline_port_in_params *params, + uint32_t *port_id); + +/** + * Pipeline input port connect to table + * + * @param p + * Handle to pipeline instance + * @param port_id + * Port ID (returned by previous invocation of pipeline input port create) + * @param table_id + * Table ID (returned by previous invocation of pipeline table create) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_connect_to_table(struct rte_pipeline *p, + uint32_t port_id, + uint32_t table_id); + +/** + * Pipeline input port enable + * + * @param p + * Handle to pipeline instance + * @param port_id + * Port ID (returned by previous invocation of pipeline input port create) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_enable(struct rte_pipeline *p, + uint32_t port_id); + +/** + * Pipeline input port disable + * + * @param p + * Handle to pipeline instance + * @param port_id + * Port ID (returned by previous invocation of pipeline input port create) + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_disable(struct rte_pipeline *p, + uint32_t port_id); + +/** + * Read pipeline port in stats. + * + * This function reads port in statistics identified by *port_id* of given + * pipeline *p*. + * + * @param p + * Handle to pipeline instance. + * @param port_id + * Port ID what stats will be returned. + * @param stats + * Statistics buffer. + * @param clear + * If not 0 clear stats after reading. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_in_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_in_stats *stats, int clear); + +/* + * Port OUT + * + */ +/** Maximum number of output ports allowed for any given pipeline instance. The + value of this parameter cannot be changed. */ +#define RTE_PIPELINE_PORT_OUT_MAX 64 + +/** + * Pipeline output port action handler + * + * The action handler can decide to drop packets by resetting the associated + * packet bit in the pkts_mask parameter. In this case, the action handler is + * required not to free the packet buffer, which will be freed eventually by + * the pipeline. + * + * @param p + * Handle to pipeline instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array will not be accessed. + * @param arg + * Opaque parameter registered by the user at the pipeline table creation + * time + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_pipeline_port_out_action_handler)( + struct rte_pipeline *p, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + void *arg); + +/** Parameters for pipeline output port creation. The action handlers have to +be either both enabled or both disabled (by setting them to NULL). When +enabled, the pipeline selects between them at different moments, based on the +number of packets that have to be sent to the same output port. */ +struct rte_pipeline_port_out_params { + /** Output port operations (specific to each table type) */ + struct rte_port_out_ops *ops; + /** Opaque parameter to be passed to create operation when invoked */ + void *arg_create; + + /** Callback function executing the user actions on bust of input + packets */ + rte_pipeline_port_out_action_handler f_action; + /** Opaque parameter to be passed to the action handler when invoked */ + void *arg_ah; +}; + +/** + * Pipeline output port create + * + * @param p + * Handle to pipeline instance + * @param params + * Parameters for pipeline output port creation + * @param port_id + * Output port ID. Valid only within the scope of output port IDs of the + * current pipeline. Only returned after a successful invocation. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_out_create(struct rte_pipeline *p, + struct rte_pipeline_port_out_params *params, + uint32_t *port_id); + +/** + * Read pipeline port out stats. + * + * This function reads port out statistics identified by *port_id* of given + * pipeline *p*. + * + * @param p + * Handle to pipeline instance. + * @param port_id + * Port ID what stats will be returned. + * @param stats + * Statistics buffer. + * @param clear + * If not 0 clear stats after reading. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_out_stats_read(struct rte_pipeline *p, uint32_t port_id, + struct rte_pipeline_port_out_stats *stats, int clear); + +/* + * Functions to be called as part of the port IN/OUT or table action handlers + * + */ +/** + * Action handler packet insert to output port + * + * This function can be called by any input/output port or table action handler + * to send a packet out through one of the pipeline output ports. This packet is + * generated by the action handler, i.e. this packet is not part of the burst of + * packets read from one of the pipeline input ports and currently processed by + * the pipeline (this packet is not an element of the pkts array input parameter + * of the action handler). + * + * @param p + * Handle to pipeline instance + * @param port_id + * Output port ID (returned by previous invocation of pipeline output port + * create) to send the packet specified by pkt + * @param pkt + * New packet generated by the action handler + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_port_out_packet_insert(struct rte_pipeline *p, + uint32_t port_id, + struct rte_mbuf *pkt); + +#define rte_pipeline_ah_port_out_packet_insert \ + rte_pipeline_port_out_packet_insert + +/** + * Action handler packet hijack + * + * This function can be called by any input/output port or table action handler + * to hijack selected packets from the burst of packets read from one of the + * pipeline input ports and currently processed by the pipeline. The hijacked + * packets are removed from any further pipeline processing, with the action + * handler now having the full ownership for these packets. + * + * The action handler can further send the hijacked packets out through any + * pipeline output port by calling the rte_pipeline_ah_port_out_packet_insert() + * function. The action handler can also drop these packets by calling the + * rte_pktmbuf_free() function, although a better alternative is provided by + * the action handler using the rte_pipeline_ah_packet_drop() function. + * + * @param p + * Handle to pipeline instance + * @param pkts_mask + * 64-bit bitmask specifying which of the packets handed over for processing + * to the action handler is to be hijacked by the action handler. When + * pkts_mask bit n is set, then element n of the pkts array (input argument to + * the action handler) is hijacked. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_ah_packet_hijack(struct rte_pipeline *p, + uint64_t pkts_mask); + +/** + * Action handler packet drop + * + * This function is called by the pipeline action handlers (port in/out, table) + * to drop the packets selected using packet mask. + * + * This function can be called by any input/output port or table action handler + * to drop selected packets from the burst of packets read from one of the + * pipeline input ports and currently processed by the pipeline. The dropped + * packets are removed from any further pipeline processing and the packet + * buffers are eventually freed to their buffer pool. + * + * This function updates the drop statistics counters correctly, therefore the + * recommended approach for dropping packets by the action handlers is to call + * this function as opposed to the action handler hijacking the packets first + * and then dropping them invisibly to the pipeline (by using the + * rte_pktmbuf_free() function). + * + * @param p + * Handle to pipeline instance + * @param pkts_mask + * 64-bit bitmask specifying which of the packets handed over for processing + * to the action handler is to be dropped by the action handler. When + * pkts_mask bit n is set, then element n of the pkts array (input argument to + * the action handler) is dropped. + * @return + * 0 on success, error code otherwise + */ +int rte_pipeline_ah_packet_drop(struct rte_pipeline *p, + uint64_t pkts_mask); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_pipeline/rte_pipeline_version.map b/lib/librte_pipeline/rte_pipeline_version.map new file mode 100644 index 00000000..e4ee154f --- /dev/null +++ b/lib/librte_pipeline/rte_pipeline_version.map @@ -0,0 +1,47 @@ +DPDK_2.0 { + global: + + rte_pipeline_check; + rte_pipeline_create; + rte_pipeline_flush; + rte_pipeline_free; + rte_pipeline_port_in_connect_to_table; + rte_pipeline_port_in_create; + rte_pipeline_port_in_disable; + rte_pipeline_port_in_enable; + rte_pipeline_port_out_create; + rte_pipeline_port_out_packet_insert; + rte_pipeline_run; + rte_pipeline_table_create; + rte_pipeline_table_default_entry_add; + rte_pipeline_table_default_entry_delete; + rte_pipeline_table_entry_add; + rte_pipeline_table_entry_delete; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_pipeline_port_in_stats_read; + rte_pipeline_port_out_stats_read; + rte_pipeline_table_stats_read; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_pipeline_table_entry_add_bulk; + rte_pipeline_table_entry_delete_bulk; + +} DPDK_2.1; + +DPDK_16.04 { + global: + + rte_pipeline_ah_packet_hijack; + rte_pipeline_ah_packet_drop; + +} DPDK_2.2; diff --git a/lib/librte_port/Makefile b/lib/librte_port/Makefile new file mode 100644 index 00000000..2c0ccbe5 --- /dev/null +++ b/lib/librte_port/Makefile @@ -0,0 +1,79 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_port.a +ifeq ($(CONFIG_RTE_PORT_PCAP),y) +LDLIBS += -lpcap +endif + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_port_version.map + +LIBABIVER := 2 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ethdev.c +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ring.c +ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y) +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_frag.c +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_ras.c +endif +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_sched.c +SRCS-$(CONFIG_RTE_LIBRTE_PORT) += rte_port_source_sink.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ethdev.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ring.h +ifeq ($(CONFIG_RTE_LIBRTE_IP_FRAG),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_frag.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_ras.h +endif +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_sched.h +SYMLINK-$(CONFIG_RTE_LIBRTE_PORT)-include += rte_port_source_sink.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) := lib/librte_eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_mempool +DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_PORT) += lib/librte_ip_frag + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_port/rte_port.h b/lib/librte_port/rte_port.h new file mode 100644 index 00000000..c3c53487 --- /dev/null +++ b/lib/librte_port/rte_port.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_H__ +#define __INCLUDE_RTE_PORT_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port + * + * This tool is part of the DPDK Packet Framework tool suite and provides + * a standard interface to implement different types of packet ports. + * + ***/ + +#include +#include + +/**@{ + * Macros to allow accessing metadata stored in the mbuf headroom + * just beyond the end of the mbuf data structure returned by a port + */ +#define RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset) \ + (&((uint8_t *)(mbuf))[offset]) +#define RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset) \ + ((uint16_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset) \ + ((uint32_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset) \ + ((uint64_t *) RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) + +#define RTE_MBUF_METADATA_UINT8(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT8_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT16(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT16_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT32(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT32_PTR(mbuf, offset)) +#define RTE_MBUF_METADATA_UINT64(mbuf, offset) \ + (*RTE_MBUF_METADATA_UINT64_PTR(mbuf, offset)) +/**@}*/ + +/* + * Port IN + * + */ +/** Maximum number of packets read from any input port in a single burst. +Cannot be changed. */ +#define RTE_PORT_IN_BURST_SIZE_MAX 64 + +/** Input port statistics */ +struct rte_port_in_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_drop; +}; + +/** + * Input port create + * + * @param params + * Parameters for input port creation + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @return + * Handle to input port instance + */ +typedef void* (*rte_port_in_op_create)(void *params, int socket_id); + +/** + * Input port free + * + * @param port + * Handle to input port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_in_op_free)(void *port); + +/** + * Input port packet burst RX + * + * @param port + * Handle to input port instance + * @param pkts + * Burst of input packets + * @param n_pkts + * Number of packets in the input burst + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_in_op_rx)( + void *port, + struct rte_mbuf **pkts, + uint32_t n_pkts); + +/** + * Input port stats get + * + * @param port + * Handle to output port instance + * @param stats + * Handle to port_in stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_port_in_op_stats_read)( + void *port, + struct rte_port_in_stats *stats, + int clear); + +/** Input port interface defining the input port operation */ +struct rte_port_in_ops { + rte_port_in_op_create f_create; /**< Create */ + rte_port_in_op_free f_free; /**< Free */ + rte_port_in_op_rx f_rx; /**< Packet RX (packet burst) */ + rte_port_in_op_stats_read f_stats; /**< Stats */ +}; + +/* + * Port OUT + * + */ +/** Output port statistics */ +struct rte_port_out_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_drop; +}; + +/** + * Output port create + * + * @param params + * Parameters for output port creation + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @return + * Handle to output port instance + */ +typedef void* (*rte_port_out_op_create)(void *params, int socket_id); + +/** + * Output port free + * + * @param port + * Handle to output port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_free)(void *port); + +/** + * Output port single packet TX + * + * @param port + * Handle to output port instance + * @param pkt + * Input packet + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_tx)( + void *port, + struct rte_mbuf *pkt); + +/** + * Output port packet burst TX + * + * @param port + * Handle to output port instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array will not be accessed. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_tx_bulk)( + void *port, + struct rte_mbuf **pkt, + uint64_t pkts_mask); + +/** + * Output port flush + * + * @param port + * Handle to output port instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_port_out_op_flush)(void *port); + +/** + * Output port stats read + * + * @param port + * Handle to output port instance + * @param stats + * Handle to port_out stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_port_out_op_stats_read)( + void *port, + struct rte_port_out_stats *stats, + int clear); + +/** Output port interface defining the output port operation */ +struct rte_port_out_ops { + rte_port_out_op_create f_create; /**< Create */ + rte_port_out_op_free f_free; /**< Free */ + rte_port_out_op_tx f_tx; /**< Packet TX (single packet) */ + rte_port_out_op_tx_bulk f_tx_bulk; /**< Packet TX (packet burst) */ + rte_port_out_op_flush f_flush; /**< Flush */ + rte_port_out_op_stats_read f_stats; /**< Stats */ +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_ethdev.c b/lib/librte_port/rte_port_ethdev.c new file mode 100644 index 00000000..73e5f185 --- /dev/null +++ b/lib/librte_port/rte_port_ethdev.c @@ -0,0 +1,553 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include + +#include "rte_port_ethdev.h" + +/* + * Port ETHDEV Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_ETHDEV_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ethdev_reader { + struct rte_port_in_stats stats; + + uint16_t queue_id; + uint8_t port_id; +}; + +static void * +rte_port_ethdev_reader_create(void *params, int socket_id) +{ + struct rte_port_ethdev_reader_params *conf = + (struct rte_port_ethdev_reader_params *) params; + struct rte_port_ethdev_reader *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: params is NULL\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->port_id = conf->port_id; + port->queue_id = conf->queue_id; + + return port; +} + +static int +rte_port_ethdev_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_ethdev_reader *p = + (struct rte_port_ethdev_reader *) port; + uint16_t rx_pkt_cnt; + + rx_pkt_cnt = rte_eth_rx_burst(p->port_id, p->queue_id, pkts, n_pkts); + RTE_PORT_ETHDEV_READER_STATS_PKTS_IN_ADD(p, rx_pkt_cnt); + return rx_pkt_cnt; +} + +static int +rte_port_ethdev_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int rte_port_ethdev_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_ethdev_reader *p = + (struct rte_port_ethdev_reader *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port ETHDEV Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ethdev_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t bsz_mask; + uint16_t queue_id; + uint8_t port_id; +}; + +static void * +rte_port_ethdev_writer_create(void *params, int socket_id) +{ + struct rte_port_ethdev_writer_params *conf = + (struct rte_port_ethdev_writer_params *) params; + struct rte_port_ethdev_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->port_id = conf->port_id; + port->queue_id = conf->queue_id; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + return port; +} + +static inline void +send_burst(struct rte_port_ethdev_writer *p) +{ + uint32_t nb_tx; + + nb_tx = rte_eth_tx_burst(p->port_id, p->queue_id, + p->tx_buf, p->tx_buf_count); + + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ethdev_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ethdev_writer *p = + (struct rte_port_ethdev_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_ethdev_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_ethdev_writer *p = + (struct rte_port_ethdev_writer *) port; + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) + send_burst(p); + + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + n_pkts_ok = rte_eth_tx_burst(p->port_id, p->queue_id, pkts, + n_pkts); + + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok); + for ( ; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + + rte_pktmbuf_free(pkt); + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst(p); + } + + return 0; +} + +static int +rte_port_ethdev_writer_flush(void *port) +{ + struct rte_port_ethdev_writer *p = + (struct rte_port_ethdev_writer *) port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_ethdev_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_ethdev_writer_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_ethdev_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ethdev_writer *p = + (struct rte_port_ethdev_writer *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port ETHDEV Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ethdev_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t tx_burst_sz; + uint16_t tx_buf_count; + uint64_t bsz_mask; + uint64_t n_retries; + uint16_t queue_id; + uint8_t port_id; +}; + +static void * +rte_port_ethdev_writer_nodrop_create(void *params, int socket_id) +{ + struct rte_port_ethdev_writer_nodrop_params *conf = + (struct rte_port_ethdev_writer_nodrop_params *) params; + struct rte_port_ethdev_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid input parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->port_id = conf->port_id; + port->queue_id = conf->queue_id; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static inline void +send_burst_nodrop(struct rte_port_ethdev_writer_nodrop *p) +{ + uint32_t nb_tx = 0, i; + + nb_tx = rte_eth_tx_burst(p->port_id, p->queue_id, p->tx_buf, + p->tx_buf_count); + + /* We sent all the packets in a first try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + + for (i = 0; i < p->n_retries; i++) { + nb_tx += rte_eth_tx_burst(p->port_id, p->queue_id, + p->tx_buf + nb_tx, p->tx_buf_count - nb_tx); + + /* We sent all the packets in more than one try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + } + + /* We didn't send the packets in maximum allowed attempts */ + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ethdev_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ethdev_writer_nodrop *p = + (struct rte_port_ethdev_writer_nodrop *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ethdev_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_ethdev_writer_nodrop *p = + (struct rte_port_ethdev_writer_nodrop *) port; + + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) + send_burst_nodrop(p); + + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + n_pkts_ok = rte_eth_tx_burst(p->port_id, p->queue_id, pkts, + n_pkts); + + if (n_pkts_ok >= n_pkts) + return 0; + + /* + * If we didnt manage to send all packets in single burst, move + * remaining packets to the buffer and call send burst. + */ + for (; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + p->tx_buf[p->tx_buf_count++] = pkt; + } + send_burst_nodrop(p); + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_ETHDEV_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + } + + return 0; +} + +static int +rte_port_ethdev_writer_nodrop_flush(void *port) +{ + struct rte_port_ethdev_writer_nodrop *p = + (struct rte_port_ethdev_writer_nodrop *) port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ethdev_writer_nodrop_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_ethdev_writer_nodrop_flush(port); + rte_free(port); + + return 0; +} + +static int rte_port_ethdev_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ethdev_writer_nodrop *p = + (struct rte_port_ethdev_writer_nodrop *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_ethdev_reader_ops = { + .f_create = rte_port_ethdev_reader_create, + .f_free = rte_port_ethdev_reader_free, + .f_rx = rte_port_ethdev_reader_rx, + .f_stats = rte_port_ethdev_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_ethdev_writer_ops = { + .f_create = rte_port_ethdev_writer_create, + .f_free = rte_port_ethdev_writer_free, + .f_tx = rte_port_ethdev_writer_tx, + .f_tx_bulk = rte_port_ethdev_writer_tx_bulk, + .f_flush = rte_port_ethdev_writer_flush, + .f_stats = rte_port_ethdev_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ethdev_writer_nodrop_ops = { + .f_create = rte_port_ethdev_writer_nodrop_create, + .f_free = rte_port_ethdev_writer_nodrop_free, + .f_tx = rte_port_ethdev_writer_nodrop_tx, + .f_tx_bulk = rte_port_ethdev_writer_nodrop_tx_bulk, + .f_flush = rte_port_ethdev_writer_nodrop_flush, + .f_stats = rte_port_ethdev_writer_nodrop_stats_read, +}; diff --git a/lib/librte_port/rte_port_ethdev.h b/lib/librte_port/rte_port_ethdev.h new file mode 100644 index 00000000..201a79e4 --- /dev/null +++ b/lib/librte_port/rte_port_ethdev.h @@ -0,0 +1,105 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_ETHDEV_H__ +#define __INCLUDE_RTE_PORT_ETHDEV_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Ethernet Device + * + * ethdev_reader: input port built on top of pre-initialized NIC RX queue + * ethdev_writer: output port built on top of pre-initialized NIC TX queue + * + ***/ + +#include + +#include "rte_port.h" + +/** ethdev_reader port parameters */ +struct rte_port_ethdev_reader_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; +}; + +/** ethdev_reader port operations */ +extern struct rte_port_in_ops rte_port_ethdev_reader_ops; + +/** ethdev_writer port parameters */ +struct rte_port_ethdev_writer_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; + + /** Recommended burst size to NIC TX queue. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** ethdev_writer port operations */ +extern struct rte_port_out_ops rte_port_ethdev_writer_ops; + +/** ethdev_writer_nodrop port parameters */ +struct rte_port_ethdev_writer_nodrop_params { + /** NIC RX port ID */ + uint8_t port_id; + + /** NIC RX queue ID */ + uint16_t queue_id; + + /** Recommended burst size to NIC TX queue. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** ethdev_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ethdev_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_frag.c b/lib/librte_port/rte_port_frag.c new file mode 100644 index 00000000..0fcace99 --- /dev/null +++ b/lib/librte_port/rte_port_frag.c @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#include +#include +#include + +#include "rte_port_frag.h" + +/* Max number of fragments per packet allowed */ +#define RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET 0x80 + +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(port, val) + +#endif + +typedef int32_t + (*frag_op)(struct rte_mbuf *pkt_in, + struct rte_mbuf **pkts_out, + uint16_t nb_pkts_out, + uint16_t mtu_size, + struct rte_mempool *pool_direct, + struct rte_mempool *pool_indirect); + +struct rte_port_ring_reader_frag { + struct rte_port_in_stats stats; + + /* Input parameters */ + struct rte_ring *ring; + uint32_t mtu; + uint32_t metadata_size; + struct rte_mempool *pool_direct; + struct rte_mempool *pool_indirect; + + /* Internal buffers */ + struct rte_mbuf *pkts[RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_mbuf *frags[RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET]; + uint32_t n_pkts; + uint32_t pos_pkts; + uint32_t n_frags; + uint32_t pos_frags; + + frag_op f_frag; +} __rte_cache_aligned; + +static void * +rte_port_ring_reader_frag_create(void *params, int socket_id, int is_ipv4) +{ + struct rte_port_ring_reader_frag_params *conf = + (struct rte_port_ring_reader_frag_params *) params; + struct rte_port_ring_reader_frag *port; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter conf is NULL\n", __func__); + return NULL; + } + if (conf->ring == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter ring is NULL\n", __func__); + return NULL; + } + if (conf->mtu == 0) { + RTE_LOG(ERR, PORT, "%s: Parameter mtu is invalid\n", __func__); + return NULL; + } + if (conf->pool_direct == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter pool_direct is NULL\n", + __func__); + return NULL; + } + if (conf->pool_indirect == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter pool_indirect is NULL\n", + __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), RTE_CACHE_LINE_SIZE, + socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->mtu = conf->mtu; + port->metadata_size = conf->metadata_size; + port->pool_direct = conf->pool_direct; + port->pool_indirect = conf->pool_indirect; + + port->n_pkts = 0; + port->pos_pkts = 0; + port->n_frags = 0; + port->pos_frags = 0; + + port->f_frag = (is_ipv4) ? + rte_ipv4_fragment_packet : rte_ipv6_fragment_packet; + + return port; +} + +static void * +rte_port_ring_reader_ipv4_frag_create(void *params, int socket_id) +{ + return rte_port_ring_reader_frag_create(params, socket_id, 1); +} + +static void * +rte_port_ring_reader_ipv6_frag_create(void *params, int socket_id) +{ + return rte_port_ring_reader_frag_create(params, socket_id, 0); +} + +static int +rte_port_ring_reader_frag_rx(void *port, + struct rte_mbuf **pkts, + uint32_t n_pkts) +{ + struct rte_port_ring_reader_frag *p = + (struct rte_port_ring_reader_frag *) port; + uint32_t n_pkts_out; + + n_pkts_out = 0; + + /* Get packets from the "frag" buffer */ + if (p->n_frags >= n_pkts) { + memcpy(pkts, &p->frags[p->pos_frags], n_pkts * sizeof(void *)); + p->pos_frags += n_pkts; + p->n_frags -= n_pkts; + + return n_pkts; + } + + memcpy(pkts, &p->frags[p->pos_frags], p->n_frags * sizeof(void *)); + n_pkts_out = p->n_frags; + p->n_frags = 0; + + /* Look to "pkts" buffer to get more packets */ + for ( ; ; ) { + struct rte_mbuf *pkt; + uint32_t n_pkts_to_provide, i; + int status; + + /* If "pkts" buffer is empty, read packet burst from ring */ + if (p->n_pkts == 0) { + p->n_pkts = rte_ring_sc_dequeue_burst(p->ring, + (void **) p->pkts, RTE_PORT_IN_BURST_SIZE_MAX); + RTE_PORT_RING_READER_FRAG_STATS_PKTS_IN_ADD(p, p->n_pkts); + if (p->n_pkts == 0) + return n_pkts_out; + p->pos_pkts = 0; + } + + /* Read next packet from "pkts" buffer */ + pkt = p->pkts[p->pos_pkts++]; + p->n_pkts--; + + /* If not jumbo, pass current packet to output */ + if (pkt->pkt_len <= p->mtu) { + pkts[n_pkts_out++] = pkt; + + n_pkts_to_provide = n_pkts - n_pkts_out; + if (n_pkts_to_provide == 0) + return n_pkts; + + continue; + } + + /* Fragment current packet into the "frags" buffer */ + status = p->f_frag( + pkt, + p->frags, + RTE_PORT_FRAG_MAX_FRAGS_PER_PACKET, + p->mtu, + p->pool_direct, + p->pool_indirect + ); + + if (status < 0) { + rte_pktmbuf_free(pkt); + RTE_PORT_RING_READER_FRAG_STATS_PKTS_DROP_ADD(p, 1); + continue; + } + + p->n_frags = (uint32_t) status; + p->pos_frags = 0; + + /* Copy meta-data from input jumbo packet to its fragments */ + for (i = 0; i < p->n_frags; i++) { + uint8_t *src = + RTE_MBUF_METADATA_UINT8_PTR(pkt, sizeof(struct rte_mbuf)); + uint8_t *dst = + RTE_MBUF_METADATA_UINT8_PTR(p->frags[i], sizeof(struct rte_mbuf)); + + memcpy(dst, src, p->metadata_size); + } + + /* Free input jumbo packet */ + rte_pktmbuf_free(pkt); + + /* Get packets from "frag" buffer */ + n_pkts_to_provide = n_pkts - n_pkts_out; + if (p->n_frags >= n_pkts_to_provide) { + memcpy(&pkts[n_pkts_out], p->frags, + n_pkts_to_provide * sizeof(void *)); + p->n_frags -= n_pkts_to_provide; + p->pos_frags += n_pkts_to_provide; + + return n_pkts; + } + + memcpy(&pkts[n_pkts_out], p->frags, + p->n_frags * sizeof(void *)); + n_pkts_out += p->n_frags; + p->n_frags = 0; + } +} + +static int +rte_port_ring_reader_frag_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__); + return -1; + } + + rte_free(port); + + return 0; +} + +static int +rte_port_frag_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_ring_reader_frag *p = + (struct rte_port_ring_reader_frag *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_ring_reader_ipv4_frag_ops = { + .f_create = rte_port_ring_reader_ipv4_frag_create, + .f_free = rte_port_ring_reader_frag_free, + .f_rx = rte_port_ring_reader_frag_rx, + .f_stats = rte_port_frag_reader_stats_read, +}; + +struct rte_port_in_ops rte_port_ring_reader_ipv6_frag_ops = { + .f_create = rte_port_ring_reader_ipv6_frag_create, + .f_free = rte_port_ring_reader_frag_free, + .f_rx = rte_port_ring_reader_frag_rx, + .f_stats = rte_port_frag_reader_stats_read, +}; diff --git a/lib/librte_port/rte_port_frag.h b/lib/librte_port/rte_port_frag.h new file mode 100644 index 00000000..0085ff7c --- /dev/null +++ b/lib/librte_port/rte_port_frag.h @@ -0,0 +1,101 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_IP_FRAG_H__ +#define __INCLUDE_RTE_PORT_IP_FRAG_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port for IPv4 Fragmentation + * + * This port is built on top of pre-initialized single consumer rte_ring. In + * order to minimize the amount of packets stored in the ring at any given + * time, the IP fragmentation functionality is executed on ring read operation, + * hence this port is implemented as an input port. A regular ring_writer port + * can be created to write to the same ring. + * + * The packets written to the ring are either complete IP datagrams or jumbo + * frames (i.e. IP packets with length bigger than provided MTU value). The + * packets read from the ring are all non-jumbo frames. The complete IP + * datagrams written to the ring are not changed. The jumbo frames are + * fragmented into several IP packets with length less or equal to MTU. + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** ring_reader_ipv4_frag port parameters */ +struct rte_port_ring_reader_frag_params { + /** Underlying single consumer ring that has to be pre-initialized. */ + struct rte_ring *ring; + + /** Maximum Transfer Unit (MTU). Maximum IP packet size (in bytes). */ + uint32_t mtu; + + /** Size of application dependent meta-data stored per each input packet + that has to be copied to each of the fragments originating from the + same input IP datagram. */ + uint32_t metadata_size; + + /** Pre-initialized buffer pool used for allocating direct buffers for + the output fragments. */ + struct rte_mempool *pool_direct; + + /** Pre-initialized buffer pool used for allocating indirect buffers for + the output fragments. */ + struct rte_mempool *pool_indirect; +}; + +#define rte_port_ring_reader_ipv4_frag_params rte_port_ring_reader_frag_params + +#define rte_port_ring_reader_ipv6_frag_params rte_port_ring_reader_frag_params + +/** ring_reader_ipv4_frag port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ipv4_frag_ops; + +/** ring_reader_ipv6_frag port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ipv6_frag_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_ras.c b/lib/librte_port/rte_port_ras.c new file mode 100644 index 00000000..c4bb5081 --- /dev/null +++ b/lib/librte_port/rte_port_ras.c @@ -0,0 +1,359 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#include +#include +#include +#include + +#include "rte_port_ras.h" + +#ifndef RTE_PORT_RAS_N_BUCKETS +#define RTE_PORT_RAS_N_BUCKETS 4094 +#endif + +#ifndef RTE_PORT_RAS_N_ENTRIES_PER_BUCKET +#define RTE_PORT_RAS_N_ENTRIES_PER_BUCKET 8 +#endif + +#ifndef RTE_PORT_RAS_N_ENTRIES +#define RTE_PORT_RAS_N_ENTRIES (RTE_PORT_RAS_N_BUCKETS * RTE_PORT_RAS_N_ENTRIES_PER_BUCKET) +#endif + +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_writer_ras; + +typedef void (*ras_op)( + struct rte_port_ring_writer_ras *p, + struct rte_mbuf *pkt); + +static void +process_ipv4(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt); +static void +process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt); + +struct rte_port_ring_writer_ras { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_ring *ring; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + struct rte_ip_frag_tbl *frag_tbl; + struct rte_ip_frag_death_row death_row; + + ras_op f_ras; +}; + +static void * +rte_port_ring_writer_ras_create(void *params, int socket_id, int is_ipv4) +{ + struct rte_port_ring_writer_ras_params *conf = + (struct rte_port_ring_writer_ras_params *) params; + struct rte_port_ring_writer_ras *port; + uint64_t frag_cycles; + + /* Check input parameters */ + if (conf == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter conf is NULL\n", __func__); + return NULL; + } + if (conf->ring == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter ring is NULL\n", __func__); + return NULL; + } + if ((conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PORT, "%s: Parameter tx_burst_sz is invalid\n", + __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate socket\n", __func__); + return NULL; + } + + /* Create fragmentation table */ + frag_cycles = (rte_get_tsc_hz() + MS_PER_S - 1) / MS_PER_S * MS_PER_S; + frag_cycles *= 100; + + port->frag_tbl = rte_ip_frag_table_create( + RTE_PORT_RAS_N_BUCKETS, + RTE_PORT_RAS_N_ENTRIES_PER_BUCKET, + RTE_PORT_RAS_N_ENTRIES, + frag_cycles, + socket_id); + + if (port->frag_tbl == NULL) { + RTE_LOG(ERR, PORT, "%s: rte_ip_frag_table_create failed\n", + __func__); + rte_free(port); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + + port->f_ras = (is_ipv4 == 1) ? process_ipv4 : process_ipv6; + + return port; +} + +static void * +rte_port_ring_writer_ipv4_ras_create(void *params, int socket_id) +{ + return rte_port_ring_writer_ras_create(params, socket_id, 1); +} + +static void * +rte_port_ring_writer_ipv6_ras_create(void *params, int socket_id) +{ + return rte_port_ring_writer_ras_create(params, socket_id, 0); +} + +static inline void +send_burst(struct rte_port_ring_writer_ras *p) +{ + uint32_t nb_tx; + + nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count); + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static void +process_ipv4(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt) +{ + /* Assume there is no ethernet header */ + struct ipv4_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv4_hdr *); + + /* Get "More fragments" flag and fragment offset */ + uint16_t frag_field = rte_be_to_cpu_16(pkt_hdr->fragment_offset); + uint16_t frag_offset = (uint16_t)(frag_field & IPV4_HDR_OFFSET_MASK); + uint16_t frag_flag = (uint16_t)(frag_field & IPV4_HDR_MF_FLAG); + + /* If it is a fragmented packet, then try to reassemble */ + if ((frag_flag == 0) && (frag_offset == 0)) + p->tx_buf[p->tx_buf_count++] = pkt; + else { + struct rte_mbuf *mo; + struct rte_ip_frag_tbl *tbl = p->frag_tbl; + struct rte_ip_frag_death_row *dr = &p->death_row; + + pkt->l3_len = sizeof(*pkt_hdr); + + /* Process this fragment */ + mo = rte_ipv4_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), + pkt_hdr); + if (mo != NULL) + p->tx_buf[p->tx_buf_count++] = mo; + + rte_ip_frag_free_death_row(&p->death_row, 3); + } +} + +static void +process_ipv6(struct rte_port_ring_writer_ras *p, struct rte_mbuf *pkt) +{ + /* Assume there is no ethernet header */ + struct ipv6_hdr *pkt_hdr = rte_pktmbuf_mtod(pkt, struct ipv6_hdr *); + + struct ipv6_extension_fragment *frag_hdr; + uint16_t frag_data = 0; + frag_hdr = rte_ipv6_frag_get_ipv6_fragment_header(pkt_hdr); + if (frag_hdr != NULL) + frag_data = rte_be_to_cpu_16(frag_hdr->frag_data); + + /* If it is a fragmented packet, then try to reassemble */ + if ((frag_data & RTE_IPV6_FRAG_USED_MASK) == 0) + p->tx_buf[p->tx_buf_count++] = pkt; + else { + struct rte_mbuf *mo; + struct rte_ip_frag_tbl *tbl = p->frag_tbl; + struct rte_ip_frag_death_row *dr = &p->death_row; + + pkt->l3_len = sizeof(*pkt_hdr) + sizeof(*frag_hdr); + + /* Process this fragment */ + mo = rte_ipv6_frag_reassemble_packet(tbl, dr, pkt, rte_rdtsc(), pkt_hdr, + frag_hdr); + if (mo != NULL) + p->tx_buf[p->tx_buf_count++] = mo; + + rte_ip_frag_free_death_row(&p->death_row, 3); + } +} + +static int +rte_port_ring_writer_ras_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer_ras *p = + (struct rte_port_ring_writer_ras *) port; + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1); + p->f_ras(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_writer_ras_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_ring_writer_ras *p = + (struct rte_port_ring_writer_ras *) port; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1); + p->f_ras(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + RTE_PORT_RING_WRITER_RAS_STATS_PKTS_IN_ADD(p, 1); + p->f_ras(p, pkt); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + pkts_mask &= ~pkt_mask; + } + } + + return 0; +} + +static int +rte_port_ring_writer_ras_flush(void *port) +{ + struct rte_port_ring_writer_ras *p = + (struct rte_port_ring_writer_ras *) port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_writer_ras_free(void *port) +{ + struct rte_port_ring_writer_ras *p = + (struct rte_port_ring_writer_ras *) port; + + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Parameter port is NULL\n", __func__); + return -1; + } + + rte_port_ring_writer_ras_flush(port); + rte_ip_frag_table_destroy(p->frag_tbl); + rte_free(port); + + return 0; +} + +static int +rte_port_ras_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ring_writer_ras *p = + (struct rte_port_ring_writer_ras *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops = { + .f_create = rte_port_ring_writer_ipv4_ras_create, + .f_free = rte_port_ring_writer_ras_free, + .f_tx = rte_port_ring_writer_ras_tx, + .f_tx_bulk = rte_port_ring_writer_ras_tx_bulk, + .f_flush = rte_port_ring_writer_ras_flush, + .f_stats = rte_port_ras_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_writer_ipv6_ras_ops = { + .f_create = rte_port_ring_writer_ipv6_ras_create, + .f_free = rte_port_ring_writer_ras_free, + .f_tx = rte_port_ring_writer_ras_tx, + .f_tx_bulk = rte_port_ring_writer_ras_tx_bulk, + .f_flush = rte_port_ring_writer_ras_flush, + .f_stats = rte_port_ras_writer_stats_read, +}; diff --git a/lib/librte_port/rte_port_ras.h b/lib/librte_port/rte_port_ras.h new file mode 100644 index 00000000..5a16f831 --- /dev/null +++ b/lib/librte_port/rte_port_ras.h @@ -0,0 +1,90 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_RAS_H__ +#define __INCLUDE_RTE_PORT_RAS_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port for IPv4 Reassembly + * + * This port is built on top of pre-initialized single producer rte_ring. In + * order to minimize the amount of packets stored in the ring at any given + * time, the IP reassembly functionality is executed on ring write operation, + * hence this port is implemented as an output port. A regular ring_reader port + * can be created to read from the same ring. + * + * The packets written to the ring are either complete IP datagrams or IP + * fragments. The packets read from the ring are all complete IP datagrams, + * either jumbo frames (i.e. IP packets with length bigger than MTU) or not. + * The complete IP datagrams written to the ring are not changed. The IP + * fragments written to the ring are first reassembled and into complete IP + * datagrams or dropped on error or IP reassembly time-out. + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** ring_writer_ipv4_ras port parameters */ +struct rte_port_ring_writer_ras_params { + /** Underlying single consumer ring that has to be pre-initialized. */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be bigger + or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +#define rte_port_ring_writer_ipv4_ras_params rte_port_ring_writer_ras_params + +#define rte_port_ring_writer_ipv6_ras_params rte_port_ring_writer_ras_params + +/** ring_writer_ipv4_ras port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ipv4_ras_ops; + +/** ring_writer_ipv6_ras port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ipv6_ras_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_ring.c b/lib/librte_port/rte_port_ring.c new file mode 100644 index 00000000..3b9d3d08 --- /dev/null +++ b/lib/librte_port/rte_port_ring.c @@ -0,0 +1,810 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include + +#include "rte_port_ring.h" + +/* + * Port RING Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_READER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_READER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_reader { + struct rte_port_in_stats stats; + + struct rte_ring *ring; +}; + +static void * +rte_port_ring_reader_create_internal(void *params, int socket_id, + uint32_t is_multi) +{ + struct rte_port_ring_reader_params *conf = + (struct rte_port_ring_reader_params *) params; + struct rte_port_ring_reader *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->ring == NULL) || + (conf->ring->cons.sc_dequeue && is_multi) || + (!(conf->ring->cons.sc_dequeue) && !is_multi)) { + RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + + return port; +} + +static void * +rte_port_ring_reader_create(void *params, int socket_id) +{ + return rte_port_ring_reader_create_internal(params, socket_id, 0); +} + +static void * +rte_port_ring_multi_reader_create(void *params, int socket_id) +{ + return rte_port_ring_reader_create_internal(params, socket_id, 1); +} + +static int +rte_port_ring_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_ring_reader *p = (struct rte_port_ring_reader *) port; + uint32_t nb_rx; + + nb_rx = rte_ring_sc_dequeue_burst(p->ring, (void **) pkts, n_pkts); + RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx); + + return nb_rx; +} + +static int +rte_port_ring_multi_reader_rx(void *port, struct rte_mbuf **pkts, + uint32_t n_pkts) +{ + struct rte_port_ring_reader *p = (struct rte_port_ring_reader *) port; + uint32_t nb_rx; + + nb_rx = rte_ring_mc_dequeue_burst(p->ring, (void **) pkts, n_pkts); + RTE_PORT_RING_READER_STATS_PKTS_IN_ADD(p, nb_rx); + + return nb_rx; +} + +static int +rte_port_ring_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int +rte_port_ring_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_ring_reader *p = + (struct rte_port_ring_reader *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port RING Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_ring *ring; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; + uint32_t is_multi; +}; + +static void * +rte_port_ring_writer_create_internal(void *params, int socket_id, + uint32_t is_multi) +{ + struct rte_port_ring_writer_params *conf = + (struct rte_port_ring_writer_params *) params; + struct rte_port_ring_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->ring == NULL) || + (conf->ring->prod.sp_enqueue && is_multi) || + (!(conf->ring->prod.sp_enqueue) && !is_multi) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + port->is_multi = is_multi; + + return port; +} + +static void * +rte_port_ring_writer_create(void *params, int socket_id) +{ + return rte_port_ring_writer_create_internal(params, socket_id, 0); +} + +static void * +rte_port_ring_multi_writer_create(void *params, int socket_id) +{ + return rte_port_ring_writer_create_internal(params, socket_id, 1); +} + +static inline void +send_burst(struct rte_port_ring_writer *p) +{ + uint32_t nb_tx; + + nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count); + + RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static inline void +send_burst_mp(struct rte_port_ring_writer *p) +{ + uint32_t nb_tx; + + nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count); + + RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ring_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_mp(p); + + return 0; +} + +static inline int __attribute__((always_inline)) +rte_port_ring_writer_tx_bulk_internal(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint32_t is_multi) +{ + struct rte_port_ring_writer *p = + (struct rte_port_ring_writer *) port; + + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) { + if (is_multi) + send_burst_mp(p); + else + send_burst(p); + } + + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, n_pkts); + if (is_multi) + n_pkts_ok = rte_ring_mp_enqueue_burst(p->ring, (void **)pkts, + n_pkts); + else + n_pkts_ok = rte_ring_sp_enqueue_burst(p->ring, (void **)pkts, + n_pkts); + + RTE_PORT_RING_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - n_pkts_ok); + for ( ; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + + rte_pktmbuf_free(pkt); + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) { + if (is_multi) + send_burst_mp(p); + else + send_burst(p); + } + } + + return 0; +} + +static int +rte_port_ring_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return rte_port_ring_writer_tx_bulk_internal(port, pkts, pkts_mask, 0); +} + +static int +rte_port_ring_multi_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return rte_port_ring_writer_tx_bulk_internal(port, pkts, pkts_mask, 1); +} + +static int +rte_port_ring_writer_flush(void *port) +{ + struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + + if (p->tx_buf_count > 0) + send_burst(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_flush(void *port) +{ + struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + + if (p->tx_buf_count > 0) + send_burst_mp(p); + + return 0; +} + +static int +rte_port_ring_writer_free(void *port) +{ + struct rte_port_ring_writer *p = (struct rte_port_ring_writer *) port; + + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + if (p->is_multi) + rte_port_ring_multi_writer_flush(port); + else + rte_port_ring_writer_flush(port); + + rte_free(port); + + return 0; +} + +static int +rte_port_ring_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ring_writer *p = + (struct rte_port_ring_writer *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port RING Writer Nodrop + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_ring_writer_nodrop { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_ring *ring; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; + uint64_t n_retries; + uint32_t is_multi; +}; + +static void * +rte_port_ring_writer_nodrop_create_internal(void *params, int socket_id, + uint32_t is_multi) +{ + struct rte_port_ring_writer_nodrop_params *conf = + (struct rte_port_ring_writer_nodrop_params *) params; + struct rte_port_ring_writer_nodrop *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->ring == NULL) || + (conf->ring->prod.sp_enqueue && is_multi) || + (!(conf->ring->prod.sp_enqueue) && !is_multi) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX)) { + RTE_LOG(ERR, PORT, "%s: Invalid Parameters\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->ring = conf->ring; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + port->is_multi = is_multi; + + /* + * When n_retries is 0 it means that we should wait for every packet to + * send no matter how many retries should it take. To limit number of + * branches in fast path, we use UINT64_MAX instead of branching. + */ + port->n_retries = (conf->n_retries == 0) ? UINT64_MAX : conf->n_retries; + + return port; +} + +static void * +rte_port_ring_writer_nodrop_create(void *params, int socket_id) +{ + return rte_port_ring_writer_nodrop_create_internal(params, socket_id, 0); +} + +static void * +rte_port_ring_multi_writer_nodrop_create(void *params, int socket_id) +{ + return rte_port_ring_writer_nodrop_create_internal(params, socket_id, 1); +} + +static inline void +send_burst_nodrop(struct rte_port_ring_writer_nodrop *p) +{ + uint32_t nb_tx = 0, i; + + nb_tx = rte_ring_sp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count); + + /* We sent all the packets in a first try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + + for (i = 0; i < p->n_retries; i++) { + nb_tx += rte_ring_sp_enqueue_burst(p->ring, + (void **) (p->tx_buf + nb_tx), p->tx_buf_count - nb_tx); + + /* We sent all the packets in more than one try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + } + + /* We didn't send the packets in maximum allowed attempts */ + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static inline void +send_burst_mp_nodrop(struct rte_port_ring_writer_nodrop *p) +{ + uint32_t nb_tx = 0, i; + + nb_tx = rte_ring_mp_enqueue_burst(p->ring, (void **)p->tx_buf, + p->tx_buf_count); + + /* We sent all the packets in a first try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + + for (i = 0; i < p->n_retries; i++) { + nb_tx += rte_ring_mp_enqueue_burst(p->ring, + (void **) (p->tx_buf + nb_tx), p->tx_buf_count - nb_tx); + + /* We sent all the packets in more than one try */ + if (nb_tx >= p->tx_buf_count) { + p->tx_buf_count = 0; + return; + } + } + + /* We didn't send the packets in maximum allowed attempts */ + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + for ( ; nb_tx < p->tx_buf_count; nb_tx++) + rte_pktmbuf_free(p->tx_buf[nb_tx]); + + p->tx_buf_count = 0; +} + +static int +rte_port_ring_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer_nodrop *p = + (struct rte_port_ring_writer_nodrop *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_nodrop_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_ring_writer_nodrop *p = + (struct rte_port_ring_writer_nodrop *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) + send_burst_mp_nodrop(p); + + return 0; +} + +static inline int __attribute__((always_inline)) +rte_port_ring_writer_nodrop_tx_bulk_internal(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint32_t is_multi) +{ + struct rte_port_ring_writer_nodrop *p = + (struct rte_port_ring_writer_nodrop *) port; + + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t n_pkts_ok; + + if (tx_buf_count) { + if (is_multi) + send_burst_mp_nodrop(p); + else + send_burst_nodrop(p); + } + + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, n_pkts); + if (is_multi) + n_pkts_ok = + rte_ring_mp_enqueue_burst(p->ring, (void **)pkts, n_pkts); + else + n_pkts_ok = + rte_ring_sp_enqueue_burst(p->ring, (void **)pkts, n_pkts); + + if (n_pkts_ok >= n_pkts) + return 0; + + /* + * If we didn't manage to send all packets in single burst, move + * remaining packets to the buffer and call send burst. + */ + for (; n_pkts_ok < n_pkts; n_pkts_ok++) { + struct rte_mbuf *pkt = pkts[n_pkts_ok]; + + p->tx_buf[p->tx_buf_count++] = pkt; + } + if (is_multi) + send_burst_mp_nodrop(p); + else + send_burst_nodrop(p); + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_RING_WRITER_NODROP_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + + p->tx_buf_count = tx_buf_count; + if (tx_buf_count >= p->tx_burst_sz) { + if (is_multi) + send_burst_mp_nodrop(p); + else + send_burst_nodrop(p); + } + } + + return 0; +} + +static int +rte_port_ring_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return + rte_port_ring_writer_nodrop_tx_bulk_internal(port, pkts, pkts_mask, 0); +} + +static int +rte_port_ring_multi_writer_nodrop_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + return + rte_port_ring_writer_nodrop_tx_bulk_internal(port, pkts, pkts_mask, 1); +} + +static int +rte_port_ring_writer_nodrop_flush(void *port) +{ + struct rte_port_ring_writer_nodrop *p = + (struct rte_port_ring_writer_nodrop *) port; + + if (p->tx_buf_count > 0) + send_burst_nodrop(p); + + return 0; +} + +static int +rte_port_ring_multi_writer_nodrop_flush(void *port) +{ + struct rte_port_ring_writer_nodrop *p = + (struct rte_port_ring_writer_nodrop *) port; + + if (p->tx_buf_count > 0) + send_burst_mp_nodrop(p); + + return 0; +} + +static int +rte_port_ring_writer_nodrop_free(void *port) +{ + struct rte_port_ring_writer_nodrop *p = + (struct rte_port_ring_writer_nodrop *) port; + + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Port is NULL\n", __func__); + return -EINVAL; + } + + if (p->is_multi) + rte_port_ring_multi_writer_nodrop_flush(port); + else + rte_port_ring_writer_nodrop_flush(port); + + rte_free(port); + + return 0; +} + +static int +rte_port_ring_writer_nodrop_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_ring_writer_nodrop *p = + (struct rte_port_ring_writer_nodrop *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_ring_reader_ops = { + .f_create = rte_port_ring_reader_create, + .f_free = rte_port_ring_reader_free, + .f_rx = rte_port_ring_reader_rx, + .f_stats = rte_port_ring_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_writer_ops = { + .f_create = rte_port_ring_writer_create, + .f_free = rte_port_ring_writer_free, + .f_tx = rte_port_ring_writer_tx, + .f_tx_bulk = rte_port_ring_writer_tx_bulk, + .f_flush = rte_port_ring_writer_flush, + .f_stats = rte_port_ring_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_writer_nodrop_ops = { + .f_create = rte_port_ring_writer_nodrop_create, + .f_free = rte_port_ring_writer_nodrop_free, + .f_tx = rte_port_ring_writer_nodrop_tx, + .f_tx_bulk = rte_port_ring_writer_nodrop_tx_bulk, + .f_flush = rte_port_ring_writer_nodrop_flush, + .f_stats = rte_port_ring_writer_nodrop_stats_read, +}; + +struct rte_port_in_ops rte_port_ring_multi_reader_ops = { + .f_create = rte_port_ring_multi_reader_create, + .f_free = rte_port_ring_reader_free, + .f_rx = rte_port_ring_multi_reader_rx, + .f_stats = rte_port_ring_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_multi_writer_ops = { + .f_create = rte_port_ring_multi_writer_create, + .f_free = rte_port_ring_writer_free, + .f_tx = rte_port_ring_multi_writer_tx, + .f_tx_bulk = rte_port_ring_multi_writer_tx_bulk, + .f_flush = rte_port_ring_multi_writer_flush, + .f_stats = rte_port_ring_writer_stats_read, +}; + +struct rte_port_out_ops rte_port_ring_multi_writer_nodrop_ops = { + .f_create = rte_port_ring_multi_writer_nodrop_create, + .f_free = rte_port_ring_writer_nodrop_free, + .f_tx = rte_port_ring_multi_writer_nodrop_tx, + .f_tx_bulk = rte_port_ring_multi_writer_nodrop_tx_bulk, + .f_flush = rte_port_ring_multi_writer_nodrop_flush, + .f_stats = rte_port_ring_writer_nodrop_stats_read, +}; diff --git a/lib/librte_port/rte_port_ring.h b/lib/librte_port/rte_port_ring.h new file mode 100644 index 00000000..de377d28 --- /dev/null +++ b/lib/librte_port/rte_port_ring.h @@ -0,0 +1,123 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_RING_H__ +#define __INCLUDE_RTE_PORT_RING_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Ring + * + * ring_reader: + * input port built on top of pre-initialized single consumer ring + * ring_writer: + * output port built on top of pre-initialized single producer ring + * ring_multi_reader: + * input port built on top of pre-initialized multi consumers ring + * ring_multi_writer: + * output port built on top of pre-initialized multi producers ring + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** ring_reader port parameters */ +struct rte_port_ring_reader_params { + /** Underlying consumer ring that has to be pre-initialized */ + struct rte_ring *ring; +}; + +/** ring_reader port operations */ +extern struct rte_port_in_ops rte_port_ring_reader_ops; + +/** ring_writer port parameters */ +struct rte_port_ring_writer_params { + /** Underlying producer ring that has to be pre-initialized */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** ring_writer port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_ops; + +/** ring_writer_nodrop port parameters */ +struct rte_port_ring_writer_nodrop_params { + /** Underlying producer ring that has to be pre-initialized */ + struct rte_ring *ring; + + /** Recommended burst size to ring. The actual burst size can be + bigger or smaller than this value. */ + uint32_t tx_burst_sz; + + /** Maximum number of retries, 0 for no limit */ + uint32_t n_retries; +}; + +/** ring_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ring_writer_nodrop_ops; + +/** ring_multi_reader port parameters */ +#define rte_port_ring_multi_reader_params rte_port_ring_reader_params + +/** ring_multi_reader port operations */ +extern struct rte_port_in_ops rte_port_ring_multi_reader_ops; + +/** ring_multi_writer port parameters */ +#define rte_port_ring_multi_writer_params rte_port_ring_writer_params + +/** ring_multi_writer port operations */ +extern struct rte_port_out_ops rte_port_ring_multi_writer_ops; + +/** ring_multi_writer_nodrop port parameters */ +#define rte_port_ring_multi_writer_nodrop_params \ + rte_port_ring_writer_nodrop_params + +/** ring_multi_writer_nodrop port operations */ +extern struct rte_port_out_ops rte_port_ring_multi_writer_nodrop_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_sched.c b/lib/librte_port/rte_port_sched.c new file mode 100644 index 00000000..25217d62 --- /dev/null +++ b/lib/librte_port/rte_port_sched.c @@ -0,0 +1,323 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include + +#include +#include + +#include "rte_port_sched.h" + +/* + * Reader + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SCHED_READER_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_SCHED_READER_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_SCHED_READER_PKTS_IN_ADD(port, val) +#define RTE_PORT_SCHED_READER_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_sched_reader { + struct rte_port_in_stats stats; + + struct rte_sched_port *sched; +}; + +static void * +rte_port_sched_reader_create(void *params, int socket_id) +{ + struct rte_port_sched_reader_params *conf = + (struct rte_port_sched_reader_params *) params; + struct rte_port_sched_reader *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->sched == NULL)) { + RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->sched = conf->sched; + + return port; +} + +static int +rte_port_sched_reader_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_sched_reader *p = (struct rte_port_sched_reader *) port; + uint32_t nb_rx; + + nb_rx = rte_sched_port_dequeue(p->sched, pkts, n_pkts); + RTE_PORT_SCHED_READER_PKTS_IN_ADD(p, nb_rx); + + return nb_rx; +} + +static int +rte_port_sched_reader_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_free(port); + + return 0; +} + +static int +rte_port_sched_reader_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_sched_reader *p = + (struct rte_port_sched_reader *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Writer + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_sched_writer { + struct rte_port_out_stats stats; + + struct rte_mbuf *tx_buf[2 * RTE_PORT_IN_BURST_SIZE_MAX]; + struct rte_sched_port *sched; + uint32_t tx_burst_sz; + uint32_t tx_buf_count; + uint64_t bsz_mask; +}; + +static void * +rte_port_sched_writer_create(void *params, int socket_id) +{ + struct rte_port_sched_writer_params *conf = + (struct rte_port_sched_writer_params *) params; + struct rte_port_sched_writer *port; + + /* Check input parameters */ + if ((conf == NULL) || + (conf->sched == NULL) || + (conf->tx_burst_sz == 0) || + (conf->tx_burst_sz > RTE_PORT_IN_BURST_SIZE_MAX) || + (!rte_is_power_of_2(conf->tx_burst_sz))) { + RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->sched = conf->sched; + port->tx_burst_sz = conf->tx_burst_sz; + port->tx_buf_count = 0; + port->bsz_mask = 1LLU << (conf->tx_burst_sz - 1); + + return port; +} + +static int +rte_port_sched_writer_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port; + + p->tx_buf[p->tx_buf_count++] = pkt; + RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(p, 1); + if (p->tx_buf_count >= p->tx_burst_sz) { + __rte_unused uint32_t nb_tx; + + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, p->tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + + return 0; +} + +static int +rte_port_sched_writer_tx_bulk(void *port, + struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port; + uint64_t bsz_mask = p->bsz_mask; + uint32_t tx_buf_count = p->tx_buf_count; + uint64_t expr = (pkts_mask & (pkts_mask + 1)) | + ((pkts_mask & bsz_mask) ^ bsz_mask); + + if (expr == 0) { + __rte_unused uint32_t nb_tx; + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + + if (tx_buf_count) { + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, + tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + + nb_tx = rte_sched_port_enqueue(p->sched, pkts, n_pkts); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, n_pkts - nb_tx); + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + p->tx_buf[tx_buf_count++] = pkt; + RTE_PORT_SCHED_WRITER_STATS_PKTS_IN_ADD(p, 1); + pkts_mask &= ~pkt_mask; + } + p->tx_buf_count = tx_buf_count; + + if (tx_buf_count >= p->tx_burst_sz) { + __rte_unused uint32_t nb_tx; + + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, + tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + } + + return 0; +} + +static int +rte_port_sched_writer_flush(void *port) +{ + struct rte_port_sched_writer *p = (struct rte_port_sched_writer *) port; + + if (p->tx_buf_count) { + __rte_unused uint32_t nb_tx; + + nb_tx = rte_sched_port_enqueue(p->sched, p->tx_buf, p->tx_buf_count); + RTE_PORT_SCHED_WRITER_STATS_PKTS_DROP_ADD(p, p->tx_buf_count - nb_tx); + p->tx_buf_count = 0; + } + + return 0; +} + +static int +rte_port_sched_writer_free(void *port) +{ + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: port is NULL\n", __func__); + return -EINVAL; + } + + rte_port_sched_writer_flush(port); + rte_free(port); + + return 0; +} + +static int +rte_port_sched_writer_stats_read(void *port, + struct rte_port_out_stats *stats, int clear) +{ + struct rte_port_sched_writer *p = + (struct rte_port_sched_writer *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_sched_reader_ops = { + .f_create = rte_port_sched_reader_create, + .f_free = rte_port_sched_reader_free, + .f_rx = rte_port_sched_reader_rx, + .f_stats = rte_port_sched_reader_stats_read, +}; + +struct rte_port_out_ops rte_port_sched_writer_ops = { + .f_create = rte_port_sched_writer_create, + .f_free = rte_port_sched_writer_free, + .f_tx = rte_port_sched_writer_tx, + .f_tx_bulk = rte_port_sched_writer_tx_bulk, + .f_flush = rte_port_sched_writer_flush, + .f_stats = rte_port_sched_writer_stats_read, +}; diff --git a/lib/librte_port/rte_port_sched.h b/lib/librte_port/rte_port_sched.h new file mode 100644 index 00000000..555415ab --- /dev/null +++ b/lib/librte_port/rte_port_sched.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_SCHED_H__ +#define __INCLUDE_RTE_PORT_SCHED_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Hierarchical Scheduler + * + * sched_reader: input port built on top of pre-initialized rte_sched_port + * sched_writer: output port built on top of pre-initialized rte_sched_port + * + ***/ + +#include + +#include + +#include "rte_port.h" + +/** sched_reader port parameters */ +struct rte_port_sched_reader_params { + /** Underlying pre-initialized rte_sched_port */ + struct rte_sched_port *sched; +}; + +/** sched_reader port operations */ +extern struct rte_port_in_ops rte_port_sched_reader_ops; + +/** sched_writer port parameters */ +struct rte_port_sched_writer_params { + /** Underlying pre-initialized rte_sched_port */ + struct rte_sched_port *sched; + + /** Recommended burst size. The actual burst size can be bigger or + smaller than this value. */ + uint32_t tx_burst_sz; +}; + +/** sched_writer port operations */ +extern struct rte_port_out_ops rte_port_sched_writer_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_source_sink.c b/lib/librte_port/rte_port_source_sink.c new file mode 100644 index 00000000..056c9756 --- /dev/null +++ b/lib/librte_port/rte_port_source_sink.c @@ -0,0 +1,667 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include +#include + +#ifdef RTE_NEXT_ABI + +#ifdef RTE_PORT_PCAP +#include +#include +#endif + +#else +#undef RTE_PORT_PCAP +#endif + +#include "rte_port_source_sink.h" + +/* + * Port SOURCE + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(port, val) \ + port->stats.n_pkts_in += val +#define RTE_PORT_SOURCE_STATS_PKTS_DROP_ADD(port, val) \ + port->stats.n_pkts_drop += val + +#else + +#define RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_SOURCE_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_source { + struct rte_port_in_stats stats; + + struct rte_mempool *mempool; + + /* PCAP buffers and indices */ + uint8_t **pkts; + uint8_t *pkt_buff; + uint32_t *pkt_len; + uint32_t n_pkts; + uint32_t pkt_index; +}; + +#ifdef RTE_NEXT_ABI + +#ifdef RTE_PORT_PCAP + +static int +pcap_source_load(struct rte_port_source *port, + const char *file_name, + uint32_t n_bytes_per_pkt, + int socket_id) +{ + uint32_t n_pkts = 0; + uint32_t i; + uint32_t *pkt_len_aligns = NULL; + size_t total_buff_len = 0; + pcap_t *pcap_handle; + char pcap_errbuf[PCAP_ERRBUF_SIZE]; + uint32_t max_len; + struct pcap_pkthdr pcap_hdr; + const uint8_t *pkt; + uint8_t *buff = NULL; + uint32_t pktmbuf_maxlen = (uint32_t) + (rte_pktmbuf_data_room_size(port->mempool) - + RTE_PKTMBUF_HEADROOM); + + if (n_bytes_per_pkt == 0) + max_len = pktmbuf_maxlen; + else + max_len = RTE_MIN(n_bytes_per_pkt, pktmbuf_maxlen); + + /* first time open, get packet number */ + pcap_handle = pcap_open_offline(file_name, pcap_errbuf); + if (pcap_handle == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "'%s' for reading\n", file_name); + goto error_exit; + } + + while ((pkt = pcap_next(pcap_handle, &pcap_hdr)) != NULL) + n_pkts++; + + pcap_close(pcap_handle); + + port->pkt_len = rte_zmalloc_socket("PCAP", + (sizeof(*port->pkt_len) * n_pkts), 0, socket_id); + if (port->pkt_len == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + pkt_len_aligns = rte_malloc("PCAP", + (sizeof(*pkt_len_aligns) * n_pkts), 0); + if (pkt_len_aligns == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + port->pkts = rte_zmalloc_socket("PCAP", + (sizeof(*port->pkts) * n_pkts), 0, socket_id); + if (port->pkts == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + /* open 2nd time, get pkt_len */ + pcap_handle = pcap_open_offline(file_name, pcap_errbuf); + if (pcap_handle == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "'%s' for reading\n", file_name); + goto error_exit; + } + + for (i = 0; i < n_pkts; i++) { + pkt = pcap_next(pcap_handle, &pcap_hdr); + port->pkt_len[i] = RTE_MIN(max_len, pcap_hdr.len); + pkt_len_aligns[i] = RTE_CACHE_LINE_ROUNDUP( + port->pkt_len[i]); + total_buff_len += pkt_len_aligns[i]; + } + + pcap_close(pcap_handle); + + /* allocate a big trunk of data for pcap file load */ + buff = rte_zmalloc_socket("PCAP", + total_buff_len, 0, socket_id); + if (buff == NULL) { + RTE_LOG(ERR, PORT, "No enough memory\n"); + goto error_exit; + } + + port->pkt_buff = buff; + + /* open file one last time to copy the pkt content */ + pcap_handle = pcap_open_offline(file_name, pcap_errbuf); + if (pcap_handle == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "'%s' for reading\n", file_name); + goto error_exit; + } + + for (i = 0; i < n_pkts; i++) { + pkt = pcap_next(pcap_handle, &pcap_hdr); + rte_memcpy(buff, pkt, port->pkt_len[i]); + port->pkts[i] = buff; + buff += pkt_len_aligns[i]; + } + + pcap_close(pcap_handle); + + port->n_pkts = n_pkts; + + rte_free(pkt_len_aligns); + + RTE_LOG(INFO, PORT, "Successfully load pcap file " + "'%s' with %u pkts\n", + file_name, port->n_pkts); + + return 0; + +error_exit: + if (pkt_len_aligns) + rte_free(pkt_len_aligns); + if (port->pkt_len) + rte_free(port->pkt_len); + if (port->pkts) + rte_free(port->pkts); + if (port->pkt_buff) + rte_free(port->pkt_buff); + + return -1; +} + +#define PCAP_SOURCE_LOAD(port, file_name, n_bytes, socket_id) \ + pcap_source_load(port, file_name, n_bytes, socket_id) + +#else /* RTE_PORT_PCAP */ + +#define PCAP_SOURCE_LOAD(port, file_name, n_bytes, socket_id) \ +({ \ + int _ret = 0; \ + \ + if (file_name) { \ + RTE_LOG(ERR, PORT, "Source port field " \ + "\"file_name\" is not NULL.\n"); \ + _ret = -1; \ + } \ + \ + _ret; \ +}) + +#endif /* RTE_PORT_PCAP */ + +#endif /* RTE_NEXT_ABI */ + +static void * +rte_port_source_create(void *params, int socket_id) +{ + struct rte_port_source_params *p = + (struct rte_port_source_params *) params; + struct rte_port_source *port; + + /* Check input arguments*/ + if ((p == NULL) || (p->mempool == NULL)) { + RTE_LOG(ERR, PORT, "%s: Invalid params\n", __func__); + return NULL; + } + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + /* Initialization */ + port->mempool = (struct rte_mempool *) p->mempool; + +#ifdef RTE_NEXT_ABI + + if (p->file_name) { + int status = PCAP_SOURCE_LOAD(port, p->file_name, + p->n_bytes_per_pkt, socket_id); + + if (status < 0) { + rte_free(port); + port = NULL; + } + } + +#endif + + return port; +} + +static int +rte_port_source_free(void *port) +{ + struct rte_port_source *p = + (struct rte_port_source *)port; + + /* Check input parameters */ + if (p == NULL) + return 0; + + if (p->pkt_len) + rte_free(p->pkt_len); + if (p->pkts) + rte_free(p->pkts); + if (p->pkt_buff) + rte_free(p->pkt_buff); + + rte_free(p); + + return 0; +} + +static int +rte_port_source_rx(void *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + struct rte_port_source *p = (struct rte_port_source *) port; + uint32_t i; + + if (rte_mempool_get_bulk(p->mempool, (void **) pkts, n_pkts) != 0) + return 0; + + for (i = 0; i < n_pkts; i++) { + rte_mbuf_refcnt_set(pkts[i], 1); + rte_pktmbuf_reset(pkts[i]); + } + + if (p->pkt_buff != NULL) { + for (i = 0; i < n_pkts; i++) { + uint8_t *pkt_data = rte_pktmbuf_mtod(pkts[i], + uint8_t *); + + rte_memcpy(pkt_data, p->pkts[p->pkt_index], + p->pkt_len[p->pkt_index]); + pkts[i]->data_len = p->pkt_len[p->pkt_index]; + pkts[i]->pkt_len = pkts[i]->data_len; + + p->pkt_index++; + if (p->pkt_index >= p->n_pkts) + p->pkt_index = 0; + } + } + + RTE_PORT_SOURCE_STATS_PKTS_IN_ADD(p, n_pkts); + + return n_pkts; +} + +static int +rte_port_source_stats_read(void *port, + struct rte_port_in_stats *stats, int clear) +{ + struct rte_port_source *p = + (struct rte_port_source *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Port SINK + */ +#ifdef RTE_PORT_STATS_COLLECT + +#define RTE_PORT_SINK_STATS_PKTS_IN_ADD(port, val) \ + (port->stats.n_pkts_in += val) +#define RTE_PORT_SINK_STATS_PKTS_DROP_ADD(port, val) \ + (port->stats.n_pkts_drop += val) + +#else + +#define RTE_PORT_SINK_STATS_PKTS_IN_ADD(port, val) +#define RTE_PORT_SINK_STATS_PKTS_DROP_ADD(port, val) + +#endif + +struct rte_port_sink { + struct rte_port_out_stats stats; + + /* PCAP dumper handle and pkts number */ + void *dumper; + uint32_t max_pkts; + uint32_t pkt_index; + uint32_t dump_finish; +}; + +#ifdef RTE_PORT_PCAP + +static int +pcap_sink_open(struct rte_port_sink *port, + const char *file_name, + uint32_t max_n_pkts) +{ + pcap_t *tx_pcap; + pcap_dumper_t *pcap_dumper; + + /** Open a dead pcap handler for opening dumper file */ + tx_pcap = pcap_open_dead(DLT_EN10MB, 65535); + if (tx_pcap == NULL) { + RTE_LOG(ERR, PORT, "Cannot open pcap dead handler\n"); + return -1; + } + + /* The dumper is created using the previous pcap_t reference */ + pcap_dumper = pcap_dump_open(tx_pcap, file_name); + if (pcap_dumper == NULL) { + RTE_LOG(ERR, PORT, "Failed to open pcap file " + "\"%s\" for writing\n", file_name); + return -1; + } + + port->dumper = pcap_dumper; + port->max_pkts = max_n_pkts; + port->pkt_index = 0; + port->dump_finish = 0; + + RTE_LOG(INFO, PORT, "Ready to dump packets to file \"%s\"\n", + file_name); + + return 0; +} + +static void +pcap_sink_write_pkt(struct rte_port_sink *port, struct rte_mbuf *mbuf) +{ + uint8_t *pcap_dumper = (uint8_t *)(port->dumper); + struct pcap_pkthdr pcap_hdr; + uint8_t jumbo_pkt_buf[ETHER_MAX_JUMBO_FRAME_LEN]; + uint8_t *pkt; + + /* Maximum num packets already reached */ + if (port->dump_finish) + return; + + pkt = rte_pktmbuf_mtod(mbuf, uint8_t *); + + pcap_hdr.len = mbuf->pkt_len; + pcap_hdr.caplen = pcap_hdr.len; + gettimeofday(&(pcap_hdr.ts), NULL); + + if (mbuf->nb_segs > 1) { + struct rte_mbuf *jumbo_mbuf; + uint32_t pkt_index = 0; + + /* if packet size longer than ETHER_MAX_JUMBO_FRAME_LEN, + * ignore it. + */ + if (mbuf->pkt_len > ETHER_MAX_JUMBO_FRAME_LEN) + return; + + for (jumbo_mbuf = mbuf; jumbo_mbuf != NULL; + jumbo_mbuf = jumbo_mbuf->next) { + rte_memcpy(&jumbo_pkt_buf[pkt_index], + rte_pktmbuf_mtod(jumbo_mbuf, uint8_t *), + jumbo_mbuf->data_len); + pkt_index += jumbo_mbuf->data_len; + } + + jumbo_pkt_buf[pkt_index] = '\0'; + + pkt = jumbo_pkt_buf; + } + + pcap_dump(pcap_dumper, &pcap_hdr, pkt); + + port->pkt_index++; + + if ((port->max_pkts != 0) && (port->pkt_index >= port->max_pkts)) { + port->dump_finish = 1; + RTE_LOG(INFO, PORT, "Dumped %u packets to file\n", + port->pkt_index); + } + +} + +#define PCAP_SINK_OPEN(port, file_name, max_n_pkts) \ + pcap_sink_open(port, file_name, max_n_pkts) + +#define PCAP_SINK_WRITE_PKT(port, mbuf) \ + pcap_sink_write_pkt(port, mbuf) + +#define PCAP_SINK_FLUSH_PKT(dumper) \ +do { \ + if (dumper) \ + pcap_dump_flush((pcap_dumper_t *)dumper); \ +} while (0) + +#define PCAP_SINK_CLOSE(dumper) \ +do { \ + if (dumper) \ + pcap_dump_close((pcap_dumper_t *)dumper); \ +} while (0) + +#else + +#define PCAP_SINK_OPEN(port, file_name, max_n_pkts) \ +({ \ + int _ret = 0; \ + \ + if (file_name) { \ + RTE_LOG(ERR, PORT, "Sink port field " \ + "\"file_name\" is not NULL.\n"); \ + _ret = -1; \ + } \ + \ + _ret; \ +}) + +#define PCAP_SINK_WRITE_PKT(port, mbuf) {} + +#define PCAP_SINK_FLUSH_PKT(dumper) + +#define PCAP_SINK_CLOSE(dumper) + +#endif + +static void * +rte_port_sink_create(void *params, int socket_id) +{ + struct rte_port_sink *port; + struct rte_port_sink_params *p = params; + + /* Memory allocation */ + port = rte_zmalloc_socket("PORT", sizeof(*port), + RTE_CACHE_LINE_SIZE, socket_id); + if (port == NULL) { + RTE_LOG(ERR, PORT, "%s: Failed to allocate port\n", __func__); + return NULL; + } + + if (!p) + return port; + + if (p->file_name) { + int status = PCAP_SINK_OPEN(port, p->file_name, + p->max_n_pkts); + + if (status < 0) { + rte_free(port); + port = NULL; + } + } + + return port; +} + +static int +rte_port_sink_tx(void *port, struct rte_mbuf *pkt) +{ + struct rte_port_sink *p = (struct rte_port_sink *) port; + + RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, 1); + if (p->dumper != NULL) + PCAP_SINK_WRITE_PKT(p, pkt); + rte_pktmbuf_free(pkt); + RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, 1); + + return 0; +} + +static int +rte_port_sink_tx_bulk(void *port, struct rte_mbuf **pkts, + uint64_t pkts_mask) +{ + struct rte_port_sink *p = (struct rte_port_sink *) port; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, n_pkts); + RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, n_pkts); + + if (p->dumper) { + for (i = 0; i < n_pkts; i++) + PCAP_SINK_WRITE_PKT(p, pkts[i]); + } + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + + rte_pktmbuf_free(pkt); + } + + } else { + if (p->dumper) { + uint64_t dump_pkts_mask = pkts_mask; + uint32_t pkt_index; + + for ( ; dump_pkts_mask; ) { + pkt_index = __builtin_ctzll( + dump_pkts_mask); + PCAP_SINK_WRITE_PKT(p, pkts[pkt_index]); + dump_pkts_mask &= ~(1LLU << pkt_index); + } + } + + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + + RTE_PORT_SINK_STATS_PKTS_IN_ADD(p, 1); + RTE_PORT_SINK_STATS_PKTS_DROP_ADD(p, 1); + rte_pktmbuf_free(pkt); + pkts_mask &= ~pkt_mask; + } + } + + return 0; +} + +static int +rte_port_sink_flush(void *port) +{ + struct rte_port_sink *p = + (struct rte_port_sink *)port; + + if (p == NULL) + return 0; + + PCAP_SINK_FLUSH_PKT(p->dumper); + + return 0; +} + +static int +rte_port_sink_free(void *port) +{ + struct rte_port_sink *p = + (struct rte_port_sink *)port; + + if (p == NULL) + return 0; + + PCAP_SINK_CLOSE(p->dumper); + + rte_free(p); + + return 0; +} + +static int +rte_port_sink_stats_read(void *port, struct rte_port_out_stats *stats, + int clear) +{ + struct rte_port_sink *p = + (struct rte_port_sink *) port; + + if (stats != NULL) + memcpy(stats, &p->stats, sizeof(p->stats)); + + if (clear) + memset(&p->stats, 0, sizeof(p->stats)); + + return 0; +} + +/* + * Summary of port operations + */ +struct rte_port_in_ops rte_port_source_ops = { + .f_create = rte_port_source_create, + .f_free = rte_port_source_free, + .f_rx = rte_port_source_rx, + .f_stats = rte_port_source_stats_read, +}; + +struct rte_port_out_ops rte_port_sink_ops = { + .f_create = rte_port_sink_create, + .f_free = rte_port_sink_free, + .f_tx = rte_port_sink_tx, + .f_tx_bulk = rte_port_sink_tx_bulk, + .f_flush = rte_port_sink_flush, + .f_stats = rte_port_sink_stats_read, +}; diff --git a/lib/librte_port/rte_port_source_sink.h b/lib/librte_port/rte_port_source_sink.h new file mode 100644 index 00000000..917abe4f --- /dev/null +++ b/lib/librte_port/rte_port_source_sink.h @@ -0,0 +1,90 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_PORT_SOURCE_SINK_H__ +#define __INCLUDE_RTE_PORT_SOURCE_SINK_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Port Source/Sink + * + * source: input port that can be used to generate packets + * sink: output port that drops all packets written to it + * + ***/ + +#include "rte_port.h" + +/** source port parameters */ +struct rte_port_source_params { + /** Pre-initialized buffer pool */ + struct rte_mempool *mempool; +#ifdef RTE_NEXT_ABI + + /** The full path of the pcap file to read packets from */ + char *file_name; + /** The number of bytes to be read from each packet in the + * pcap file. If this value is 0, the whole packet is read; + * if it is bigger than packet size, the generated packets + * will contain the whole packet */ + uint32_t n_bytes_per_pkt; + +#endif +}; + +/** source port operations */ +extern struct rte_port_in_ops rte_port_source_ops; + +/** sink port parameters */ +struct rte_port_sink_params { + /** The full path of the pcap file to write the packets to */ + char *file_name; + /** The maximum number of packets write to the pcap file. + * If this value is 0, the "infinite" write will be carried + * out. + */ + uint32_t max_n_pkts; +}; + +/** sink port operations */ +extern struct rte_port_out_ops rte_port_sink_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_port/rte_port_version.map b/lib/librte_port/rte_port_version.map new file mode 100644 index 00000000..7a0b34d0 --- /dev/null +++ b/lib/librte_port/rte_port_version.map @@ -0,0 +1,37 @@ +DPDK_2.0 { + global: + + rte_port_ethdev_reader_ops; + rte_port_ethdev_writer_ops; + rte_port_ring_reader_ipv4_frag_ops; + rte_port_ring_reader_ops; + rte_port_ring_reader_ops; + rte_port_ring_writer_ipv4_ras_ops; + rte_port_ring_writer_ops; + rte_port_ring_writer_ops; + rte_port_sched_reader_ops; + rte_port_sched_writer_ops; + rte_port_sink_ops; + rte_port_source_ops; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_port_ethdev_writer_nodrop_ops; + rte_port_ring_reader_ipv6_frag_ops; + rte_port_ring_writer_ipv6_ras_ops; + rte_port_ring_writer_nodrop_ops; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_port_ring_multi_reader_ops; + rte_port_ring_multi_writer_ops; + rte_port_ring_multi_writer_nodrop_ops; + +} DPDK_2.1; diff --git a/lib/librte_power/Makefile b/lib/librte_power/Makefile new file mode 100644 index 00000000..cee95cd8 --- /dev/null +++ b/lib/librte_power/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_power.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -fno-strict-aliasing + +EXPORT_MAP := rte_power_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_POWER) := rte_power.c rte_power_acpi_cpufreq.c +SRCS-$(CONFIG_RTE_LIBRTE_POWER) += rte_power_kvm_vm.c guest_channel.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_POWER)-include := rte_power.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_POWER) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_power/channel_commands.h b/lib/librte_power/channel_commands.h new file mode 100644 index 00000000..de6d9269 --- /dev/null +++ b/lib/librte_power/channel_commands.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef CHANNEL_COMMANDS_H_ +#define CHANNEL_COMMANDS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/* Maximum number of CPUs */ +#define CHANNEL_CMDS_MAX_CPUS 64 +#if CHANNEL_CMDS_MAX_CPUS > 64 +#error Maximum number of cores is 64, overflow is guaranteed to \ + cause problems with VM Power Management +#endif + +/* Maximum number of channels per VM */ +#define CHANNEL_CMDS_MAX_VM_CHANNELS 64 + +/* Valid Commands */ +#define CPU_POWER 1 +#define CPU_POWER_CONNECT 2 + +/* CPU Power Command Scaling */ +#define CPU_POWER_SCALE_UP 1 +#define CPU_POWER_SCALE_DOWN 2 +#define CPU_POWER_SCALE_MAX 3 +#define CPU_POWER_SCALE_MIN 4 + +struct channel_packet { + uint64_t resource_id; /**< core_num, device */ + uint32_t unit; /**< scale down/up/min/max */ + uint32_t command; /**< Power, IO, etc */ +}; + + +#ifdef __cplusplus +} +#endif + +#endif /* CHANNEL_COMMANDS_H_ */ diff --git a/lib/librte_power/guest_channel.c b/lib/librte_power/guest_channel.c new file mode 100644 index 00000000..d6b6d0aa --- /dev/null +++ b/lib/librte_power/guest_channel.c @@ -0,0 +1,161 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +#include "guest_channel.h" +#include "channel_commands.h" + +#define RTE_LOGTYPE_GUEST_CHANNEL RTE_LOGTYPE_USER1 + +static int global_fds[RTE_MAX_LCORE]; + +int +guest_channel_host_connect(const char *path, unsigned lcore_id) +{ + int flags, ret; + struct channel_packet pkt; + char fd_path[PATH_MAX]; + int fd = -1; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n", + lcore_id, RTE_MAX_LCORE-1); + return -1; + } + /* check if path is already open */ + if (global_fds[lcore_id] != 0) { + RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is already open with fd %d\n", + lcore_id, global_fds[lcore_id]); + return -1; + } + + snprintf(fd_path, PATH_MAX, "%s.%u", path, lcore_id); + RTE_LOG(INFO, GUEST_CHANNEL, "Opening channel '%s' for lcore %u\n", + fd_path, lcore_id); + fd = open(fd_path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, GUEST_CHANNEL, "Unable to to connect to '%s' with error " + "%s\n", fd_path, strerror(errno)); + return -1; + } + + flags = fcntl(fd, F_GETFL, 0); + if (flags < 0) { + RTE_LOG(ERR, GUEST_CHANNEL, "Failed on fcntl get flags for file %s\n", + fd_path); + goto error; + } + + flags |= O_NONBLOCK; + if (fcntl(fd, F_SETFL, flags) < 0) { + RTE_LOG(ERR, GUEST_CHANNEL, "Failed on setting non-blocking mode for " + "file %s", fd_path); + goto error; + } + /* QEMU needs a delay after connection */ + sleep(1); + + /* Send a test packet, this command is ignored by the host, but a successful + * send indicates that the host endpoint is monitoring. + */ + pkt.command = CPU_POWER_CONNECT; + global_fds[lcore_id] = fd; + ret = guest_channel_send_msg(&pkt, lcore_id); + if (ret != 0) { + RTE_LOG(ERR, GUEST_CHANNEL, "Error on channel '%s' communications " + "test: %s\n", fd_path, strerror(ret)); + goto error; + } + RTE_LOG(INFO, GUEST_CHANNEL, "Channel '%s' is now connected\n", fd_path); + return 0; +error: + close(fd); + global_fds[lcore_id] = 0; + return -1; +} + +int +guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id) +{ + int ret, buffer_len = sizeof(*pkt); + void *buffer = pkt; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n", + lcore_id, RTE_MAX_LCORE-1); + return -1; + } + + if (global_fds[lcore_id] == 0) { + RTE_LOG(ERR, GUEST_CHANNEL, "Channel is not connected\n"); + return -1; + } + while (buffer_len > 0) { + ret = write(global_fds[lcore_id], buffer, buffer_len); + if (ret == buffer_len) + return 0; + if (ret == -1) { + if (errno == EINTR) + continue; + return errno; + } + buffer = (char *)buffer + ret; + buffer_len -= ret; + } + return 0; +} + +void +guest_channel_host_disconnect(unsigned lcore_id) +{ + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, GUEST_CHANNEL, "Channel(%u) is out of range 0...%d\n", + lcore_id, RTE_MAX_LCORE-1); + return; + } + if (global_fds[lcore_id] == 0) + return; + close(global_fds[lcore_id]); + global_fds[lcore_id] = 0; +} diff --git a/lib/librte_power/guest_channel.h b/lib/librte_power/guest_channel.h new file mode 100644 index 00000000..9e18af52 --- /dev/null +++ b/lib/librte_power/guest_channel.h @@ -0,0 +1,89 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _GUEST_CHANNEL_H +#define _GUEST_CHANNEL_H + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Connect to the Virtio-Serial VM end-point located in path. It is + * thread safe for unique lcore_ids. This function must be only called once from + * each lcore. + * + * @param path + * The path to the serial device on the filesystem + * @param lcore_id + * lcore_id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int guest_channel_host_connect(const char *path, unsigned lcore_id); + +/** + * Disconnect from an already connected Virtio-Serial Endpoint. + * + * + * @param lcore_id + * lcore_id. + * + */ +void guest_channel_host_disconnect(unsigned lcore_id); + +/** + * Send a message contained in pkt over the Virtio-Serial to the host endpoint. + * + * @param pkt + * Pointer to a populated struct guest_agent_pkt + * + * @param lcore_id + * lcore_id. + * + * @return + * - 0 on success. + * - Negative on channel not connected. + * - errno on write to channel error. + */ +int guest_channel_send_msg(struct channel_packet *pkt, unsigned lcore_id); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_power/rte_power.c b/lib/librte_power/rte_power.c new file mode 100644 index 00000000..998ed1c9 --- /dev/null +++ b/lib/librte_power/rte_power.c @@ -0,0 +1,143 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "rte_power.h" +#include "rte_power_acpi_cpufreq.h" +#include "rte_power_kvm_vm.h" +#include "rte_power_common.h" + +enum power_management_env global_default_env = PM_ENV_NOT_SET; + +volatile uint32_t global_env_cfg_status = 0; + +/* function pointers */ +rte_power_freqs_t rte_power_freqs = NULL; +rte_power_get_freq_t rte_power_get_freq = NULL; +rte_power_set_freq_t rte_power_set_freq = NULL; +rte_power_freq_change_t rte_power_freq_up = NULL; +rte_power_freq_change_t rte_power_freq_down = NULL; +rte_power_freq_change_t rte_power_freq_max = NULL; +rte_power_freq_change_t rte_power_freq_min = NULL; + +int +rte_power_set_env(enum power_management_env env) +{ + if (rte_atomic32_cmpset(&global_env_cfg_status, 0, 1) == 0) { + return 0; + } + if (env == PM_ENV_ACPI_CPUFREQ) { + rte_power_freqs = rte_power_acpi_cpufreq_freqs; + rte_power_get_freq = rte_power_acpi_cpufreq_get_freq; + rte_power_set_freq = rte_power_acpi_cpufreq_set_freq; + rte_power_freq_up = rte_power_acpi_cpufreq_freq_up; + rte_power_freq_down = rte_power_acpi_cpufreq_freq_down; + rte_power_freq_min = rte_power_acpi_cpufreq_freq_min; + rte_power_freq_max = rte_power_acpi_cpufreq_freq_max; + } else if (env == PM_ENV_KVM_VM) { + rte_power_freqs = rte_power_kvm_vm_freqs; + rte_power_get_freq = rte_power_kvm_vm_get_freq; + rte_power_set_freq = rte_power_kvm_vm_set_freq; + rte_power_freq_up = rte_power_kvm_vm_freq_up; + rte_power_freq_down = rte_power_kvm_vm_freq_down; + rte_power_freq_min = rte_power_kvm_vm_freq_min; + rte_power_freq_max = rte_power_kvm_vm_freq_max; + } else { + RTE_LOG(ERR, POWER, "Invalid Power Management Environment(%d) set\n", + env); + rte_power_unset_env(); + return -1; + } + global_default_env = env; + return 0; + +} + +void +rte_power_unset_env(void) +{ + if (rte_atomic32_cmpset(&global_env_cfg_status, 1, 0) != 0) + global_default_env = PM_ENV_NOT_SET; +} + +enum power_management_env +rte_power_get_env(void) { + return global_default_env; +} + +int +rte_power_init(unsigned lcore_id) +{ + int ret = -1; + + if (global_default_env == PM_ENV_ACPI_CPUFREQ) { + return rte_power_acpi_cpufreq_init(lcore_id); + } + if (global_default_env == PM_ENV_KVM_VM) { + return rte_power_kvm_vm_init(lcore_id); + } + /* Auto detect Environment */ + RTE_LOG(INFO, POWER, "Attempting to initialise ACPI cpufreq power " + "management...\n"); + ret = rte_power_acpi_cpufreq_init(lcore_id); + if (ret == 0) { + rte_power_set_env(PM_ENV_ACPI_CPUFREQ); + goto out; + } + + RTE_LOG(INFO, POWER, "Attempting to initialise VM power management...\n"); + ret = rte_power_kvm_vm_init(lcore_id); + if (ret == 0) { + rte_power_set_env(PM_ENV_KVM_VM); + goto out; + } + RTE_LOG(ERR, POWER, "Unable to set Power Management Environment for lcore " + "%u\n", lcore_id); +out: + return ret; +} + +int +rte_power_exit(unsigned lcore_id) +{ + if (global_default_env == PM_ENV_ACPI_CPUFREQ) + return rte_power_acpi_cpufreq_exit(lcore_id); + if (global_default_env == PM_ENV_KVM_VM) + return rte_power_kvm_vm_exit(lcore_id); + + RTE_LOG(ERR, POWER, "Environment has not been set, unable to exit " + "gracefully\n"); + return -1; + +} diff --git a/lib/librte_power/rte_power.h b/lib/librte_power/rte_power.h new file mode 100644 index 00000000..67e0ec02 --- /dev/null +++ b/lib/librte_power/rte_power.h @@ -0,0 +1,243 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_POWER_H +#define _RTE_POWER_H + +/** + * @file + * RTE Power Management + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/* Power Management Environment State */ +enum power_management_env {PM_ENV_NOT_SET, PM_ENV_ACPI_CPUFREQ, PM_ENV_KVM_VM}; + +/** + * Set the default power management implementation. If this is not called prior + * to rte_power_init(), then auto-detect of the environment will take place. + * It is not thread safe. + * + * @param env + * env. The environment in which to initialise Power Management for. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_set_env(enum power_management_env env); + +/** + * Unset the global environment configuration. + * This can only be called after all threads have completed. + */ +void rte_power_unset_env(void); + +/** + * Get the default power management implementation. + * + * @return + * power_management_env The configured environment. + */ +enum power_management_env rte_power_get_env(void); + +/** + * Initialize power management for a specific lcore. If rte_power_set_env() has + * not been called then an auto-detect of the environment will start and + * initialise the corresponding resources. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_init(unsigned lcore_id); + +/** + * Exit power management on a specific lcore. This will call the environment + * dependent exit function. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_exit(unsigned lcore_id); + +/** + * Get the available frequencies of a specific lcore. + * Function pointer definition. Review each environments + * specific documentation for usage. + * + * @param lcore_id + * lcore id. + * @param freqs + * The buffer array to save the frequencies. + * @param num + * The number of frequencies to get. + * + * @return + * The number of available frequencies. + */ +typedef uint32_t (*rte_power_freqs_t)(unsigned lcore_id, uint32_t *freqs, + uint32_t num); + +extern rte_power_freqs_t rte_power_freqs; + +/** + * Return the current index of available frequencies of a specific lcore. + * Function pointer definition. Review each environments + * specific documentation for usage. + * + * @param lcore_id + * lcore id. + * + * @return + * The current index of available frequencies. + */ +typedef uint32_t (*rte_power_get_freq_t)(unsigned lcore_id); + +extern rte_power_get_freq_t rte_power_get_freq; + +/** + * Set the new frequency for a specific lcore by indicating the index of + * available frequencies. + * Function pointer definition. Review each environments + * specific documentation for usage. + * + * @param lcore_id + * lcore id. + * @param index + * The index of available frequencies. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +typedef int (*rte_power_set_freq_t)(unsigned lcore_id, uint32_t index); + +extern rte_power_set_freq_t rte_power_set_freq; + +/** + * Function pointer definition for generic frequency change functions. Review + * each environments specific documentation for usage. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +typedef int (*rte_power_freq_change_t)(unsigned lcore_id); + +/** + * Scale up the frequency of a specific lcore according to the available + * frequencies. + * Review each environments specific documentation for usage. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +extern rte_power_freq_change_t rte_power_freq_up; + +/** + * Scale down the frequency of a specific lcore according to the available + * frequencies. + * Review each environments specific documentation for usage. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ + +extern rte_power_freq_change_t rte_power_freq_down; + +/** + * Scale up the frequency of a specific lcore to the highest according to the + * available frequencies. + * Review each environments specific documentation for usage. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +extern rte_power_freq_change_t rte_power_freq_max; + +/** + * Scale down the frequency of a specific lcore to the lowest according to the + * available frequencies. + * Review each environments specific documentation for usage.. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +extern rte_power_freq_change_t rte_power_freq_min; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_power/rte_power_acpi_cpufreq.c b/lib/librte_power/rte_power_acpi_cpufreq.c new file mode 100644 index 00000000..a56c9b59 --- /dev/null +++ b/lib/librte_power/rte_power_acpi_cpufreq.c @@ -0,0 +1,545 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "rte_power_acpi_cpufreq.h" +#include "rte_power_common.h" + +#ifdef RTE_LIBRTE_POWER_DEBUG +#define POWER_DEBUG_TRACE(fmt, args...) do { \ + RTE_LOG(ERR, POWER, "%s: " fmt, __func__, ## args); \ +} while (0) +#else +#define POWER_DEBUG_TRACE(fmt, args...) +#endif + +#define FOPEN_OR_ERR_RET(f, retval) do { \ + if ((f) == NULL) { \ + RTE_LOG(ERR, POWER, "File not openned\n"); \ + return retval; \ + } \ +} while (0) + +#define FOPS_OR_NULL_GOTO(ret, label) do { \ + if ((ret) == NULL) { \ + RTE_LOG(ERR, POWER, "fgets returns nothing\n"); \ + goto label; \ + } \ +} while (0) + +#define FOPS_OR_ERR_GOTO(ret, label) do { \ + if ((ret) < 0) { \ + RTE_LOG(ERR, POWER, "File operations failed\n"); \ + goto label; \ + } \ +} while (0) + +#define STR_SIZE 1024 +#define POWER_CONVERT_TO_DECIMAL 10 + +#define POWER_GOVERNOR_USERSPACE "userspace" +#define POWER_SYSFILE_GOVERNOR \ + "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_governor" +#define POWER_SYSFILE_AVAIL_FREQ \ + "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_available_frequencies" +#define POWER_SYSFILE_SETSPEED \ + "/sys/devices/system/cpu/cpu%u/cpufreq/scaling_setspeed" + +enum power_state { + POWER_IDLE = 0, + POWER_ONGOING, + POWER_USED, + POWER_UNKNOWN +}; + +/** + * Power info per lcore. + */ +struct rte_power_info { + unsigned lcore_id; /**< Logical core id */ + uint32_t freqs[RTE_MAX_LCORE_FREQS]; /**< Frequency array */ + uint32_t nb_freqs; /**< number of available freqs */ + FILE *f; /**< FD of scaling_setspeed */ + char governor_ori[32]; /**< Original governor name */ + uint32_t curr_idx; /**< Freq index in freqs array */ + volatile uint32_t state; /**< Power in use state */ +} __rte_cache_aligned; + +static struct rte_power_info lcore_power_info[RTE_MAX_LCORE]; + +/** + * It is to set specific freq for specific logical core, according to the index + * of supported frequencies. + */ +static int +set_freq_internal(struct rte_power_info *pi, uint32_t idx) +{ + if (idx >= RTE_MAX_LCORE_FREQS || idx >= pi->nb_freqs) { + RTE_LOG(ERR, POWER, "Invalid frequency index %u, which " + "should be less than %u\n", idx, pi->nb_freqs); + return -1; + } + + /* Check if it is the same as current */ + if (idx == pi->curr_idx) + return 0; + + POWER_DEBUG_TRACE("Freqency[%u] %u to be set for lcore %u\n", + idx, pi->freqs[idx], pi->lcore_id); + if (fseek(pi->f, 0, SEEK_SET) < 0) { + RTE_LOG(ERR, POWER, "Fail to set file position indicator to 0 " + "for setting frequency for lcore %u\n", pi->lcore_id); + return -1; + } + if (fprintf(pi->f, "%u", pi->freqs[idx]) < 0) { + RTE_LOG(ERR, POWER, "Fail to write new frequency for " + "lcore %u\n", pi->lcore_id); + return -1; + } + fflush(pi->f); + pi->curr_idx = idx; + + return 1; +} + +/** + * It is to check the current scaling governor by reading sys file, and then + * set it into 'userspace' if it is not by writing the sys file. The original + * governor will be saved for rolling back. + */ +static int +power_set_governor_userspace(struct rte_power_info *pi) +{ + FILE *f; + int ret = -1; + char buf[BUFSIZ]; + char fullpath[PATH_MAX]; + char *s; + int val; + + snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR, + pi->lcore_id); + f = fopen(fullpath, "rw+"); + FOPEN_OR_ERR_RET(f, ret); + + s = fgets(buf, sizeof(buf), f); + FOPS_OR_NULL_GOTO(s, out); + + /* Check if current governor is userspace */ + if (strncmp(buf, POWER_GOVERNOR_USERSPACE, + sizeof(POWER_GOVERNOR_USERSPACE)) == 0) { + ret = 0; + POWER_DEBUG_TRACE("Power management governor of lcore %u is " + "already userspace\n", pi->lcore_id); + goto out; + } + /* Save the original governor */ + snprintf(pi->governor_ori, sizeof(pi->governor_ori), "%s", buf); + + /* Write 'userspace' to the governor */ + val = fseek(f, 0, SEEK_SET); + FOPS_OR_ERR_GOTO(val, out); + + val = fputs(POWER_GOVERNOR_USERSPACE, f); + FOPS_OR_ERR_GOTO(val, out); + + ret = 0; + RTE_LOG(INFO, POWER, "Power management governor of lcore %u has been " + "set to user space successfully\n", pi->lcore_id); +out: + fclose(f); + + return ret; +} + +/** + * It is to get the available frequencies of the specific lcore by reading the + * sys file. + */ +static int +power_get_available_freqs(struct rte_power_info *pi) +{ + FILE *f; + int ret = -1, i, count; + char *p; + char buf[BUFSIZ]; + char fullpath[PATH_MAX]; + char *freqs[RTE_MAX_LCORE_FREQS]; + char *s; + + snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_AVAIL_FREQ, + pi->lcore_id); + f = fopen(fullpath, "r"); + FOPEN_OR_ERR_RET(f, ret); + + s = fgets(buf, sizeof(buf), f); + FOPS_OR_NULL_GOTO(s, out); + + /* Strip the line break if there is */ + p = strchr(buf, '\n'); + if (p != NULL) + *p = 0; + + /* Split string into at most RTE_MAX_LCORE_FREQS frequencies */ + count = rte_strsplit(buf, sizeof(buf), freqs, + RTE_MAX_LCORE_FREQS, ' '); + if (count <= 0) { + RTE_LOG(ERR, POWER, "No available frequency in " + ""POWER_SYSFILE_AVAIL_FREQ"\n", pi->lcore_id); + goto out; + } + if (count >= RTE_MAX_LCORE_FREQS) { + RTE_LOG(ERR, POWER, "Too many available frequencies : %d\n", + count); + goto out; + } + + /* Store the available frequncies into power context */ + for (i = 0, pi->nb_freqs = 0; i < count; i++) { + POWER_DEBUG_TRACE("Lcore %u frequency[%d]: %s\n", pi->lcore_id, + i, freqs[i]); + pi->freqs[pi->nb_freqs++] = strtoul(freqs[i], &p, + POWER_CONVERT_TO_DECIMAL); + } + + ret = 0; + POWER_DEBUG_TRACE("%d frequencie(s) of lcore %u are available\n", + count, pi->lcore_id); +out: + fclose(f); + + return ret; +} + +/** + * It is to fopen the sys file for the future setting the lcore frequency. + */ +static int +power_init_for_setting_freq(struct rte_power_info *pi) +{ + FILE *f; + char fullpath[PATH_MAX]; + char buf[BUFSIZ]; + uint32_t i, freq; + char *s; + + snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_SETSPEED, + pi->lcore_id); + f = fopen(fullpath, "rw+"); + FOPEN_OR_ERR_RET(f, -1); + + s = fgets(buf, sizeof(buf), f); + FOPS_OR_NULL_GOTO(s, out); + + freq = strtoul(buf, NULL, POWER_CONVERT_TO_DECIMAL); + for (i = 0; i < pi->nb_freqs; i++) { + if (freq == pi->freqs[i]) { + pi->curr_idx = i; + pi->f = f; + return 0; + } + } + +out: + fclose(f); + + return -1; +} + +int +rte_power_acpi_cpufreq_init(unsigned lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n", + lcore_id, RTE_MAX_LCORE - 1U); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + if (rte_atomic32_cmpset(&(pi->state), POWER_IDLE, POWER_ONGOING) + == 0) { + RTE_LOG(INFO, POWER, "Power management of lcore %u is " + "in use\n", lcore_id); + return -1; + } + + pi->lcore_id = lcore_id; + /* Check and set the governor */ + if (power_set_governor_userspace(pi) < 0) { + RTE_LOG(ERR, POWER, "Cannot set governor of lcore %u to " + "userspace\n", lcore_id); + goto fail; + } + + /* Get the available frequencies */ + if (power_get_available_freqs(pi) < 0) { + RTE_LOG(ERR, POWER, "Cannot get available frequencies of " + "lcore %u\n", lcore_id); + goto fail; + } + + /* Init for setting lcore frequency */ + if (power_init_for_setting_freq(pi) < 0) { + RTE_LOG(ERR, POWER, "Cannot init for setting frequency for " + "lcore %u\n", lcore_id); + goto fail; + } + + /* Set freq to max by default */ + if (rte_power_acpi_cpufreq_freq_max(lcore_id) < 0) { + RTE_LOG(ERR, POWER, "Cannot set frequency of lcore %u " + "to max\n", lcore_id); + goto fail; + } + + RTE_LOG(INFO, POWER, "Initialized successfully for lcore %u " + "power manamgement\n", lcore_id); + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_USED); + + return 0; + +fail: + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN); + + return -1; +} + +/** + * It is to check the governor and then set the original governor back if + * needed by writing the the sys file. + */ +static int +power_set_governor_original(struct rte_power_info *pi) +{ + FILE *f; + int ret = -1; + char buf[BUFSIZ]; + char fullpath[PATH_MAX]; + char *s; + int val; + + snprintf(fullpath, sizeof(fullpath), POWER_SYSFILE_GOVERNOR, + pi->lcore_id); + f = fopen(fullpath, "rw+"); + FOPEN_OR_ERR_RET(f, ret); + + s = fgets(buf, sizeof(buf), f); + FOPS_OR_NULL_GOTO(s, out); + + /* Check if the governor to be set is the same as current */ + if (strncmp(buf, pi->governor_ori, sizeof(pi->governor_ori)) == 0) { + ret = 0; + POWER_DEBUG_TRACE("Power management governor of lcore %u " + "has already been set to %s\n", + pi->lcore_id, pi->governor_ori); + goto out; + } + + /* Write back the original governor */ + val = fseek(f, 0, SEEK_SET); + FOPS_OR_ERR_GOTO(val, out); + + val = fputs(pi->governor_ori, f); + FOPS_OR_ERR_GOTO(val, out); + + ret = 0; + RTE_LOG(INFO, POWER, "Power management governor of lcore %u " + "has been set back to %s successfully\n", + pi->lcore_id, pi->governor_ori); +out: + fclose(f); + + return ret; +} + +int +rte_power_acpi_cpufreq_exit(unsigned lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Lcore id %u can not exceeds %u\n", + lcore_id, RTE_MAX_LCORE - 1U); + return -1; + } + pi = &lcore_power_info[lcore_id]; + if (rte_atomic32_cmpset(&(pi->state), POWER_USED, POWER_ONGOING) + == 0) { + RTE_LOG(INFO, POWER, "Power management of lcore %u is " + "not used\n", lcore_id); + return -1; + } + + /* Close FD of setting freq */ + fclose(pi->f); + pi->f = NULL; + + /* Set the governor back to the original */ + if (power_set_governor_original(pi) < 0) { + RTE_LOG(ERR, POWER, "Cannot set the governor of %u back " + "to the original\n", lcore_id); + goto fail; + } + + RTE_LOG(INFO, POWER, "Power management of lcore %u has exited from " + "'userspace' mode and been set back to the " + "original\n", lcore_id); + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_IDLE); + + return 0; + +fail: + rte_atomic32_cmpset(&(pi->state), POWER_ONGOING, POWER_UNKNOWN); + + return -1; +} + +uint32_t +rte_power_acpi_cpufreq_freqs(unsigned lcore_id, uint32_t *freqs, uint32_t num) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE || !freqs) { + RTE_LOG(ERR, POWER, "Invalid input parameter\n"); + return 0; + } + + pi = &lcore_power_info[lcore_id]; + if (num < pi->nb_freqs) { + RTE_LOG(ERR, POWER, "Buffer size is not enough\n"); + return 0; + } + rte_memcpy(freqs, pi->freqs, pi->nb_freqs * sizeof(uint32_t)); + + return pi->nb_freqs; +} + +uint32_t +rte_power_acpi_cpufreq_get_freq(unsigned lcore_id) +{ + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return RTE_POWER_INVALID_FREQ_INDEX; + } + + return lcore_power_info[lcore_id].curr_idx; +} + +int +rte_power_acpi_cpufreq_set_freq(unsigned lcore_id, uint32_t index) +{ + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + return set_freq_internal(&(lcore_power_info[lcore_id]), index); +} + +int +rte_power_acpi_cpufreq_freq_down(unsigned lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + if (pi->curr_idx + 1 == pi->nb_freqs) + return 0; + + /* Frequencies in the array are from high to low. */ + return set_freq_internal(pi, pi->curr_idx + 1); +} + +int +rte_power_acpi_cpufreq_freq_up(unsigned lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + if (pi->curr_idx == 0) + return 0; + + /* Frequencies in the array are from high to low. */ + return set_freq_internal(pi, pi->curr_idx - 1); +} + +int +rte_power_acpi_cpufreq_freq_max(unsigned lcore_id) +{ + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + /* Frequencies in the array are from high to low. */ + return set_freq_internal(&lcore_power_info[lcore_id], 0); +} + +int +rte_power_acpi_cpufreq_freq_min(unsigned lcore_id) +{ + struct rte_power_info *pi; + + if (lcore_id >= RTE_MAX_LCORE) { + RTE_LOG(ERR, POWER, "Invalid lcore ID\n"); + return -1; + } + + pi = &lcore_power_info[lcore_id]; + + /* Frequencies in the array are from high to low. */ + return set_freq_internal(pi, pi->nb_freqs - 1); +} diff --git a/lib/librte_power/rte_power_acpi_cpufreq.h b/lib/librte_power/rte_power_acpi_cpufreq.h new file mode 100644 index 00000000..68578e9b --- /dev/null +++ b/lib/librte_power/rte_power_acpi_cpufreq.h @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_POWER_ACPI_CPUFREQ_H +#define _RTE_POWER_ACPI_CPUFREQ_H + +/** + * @file + * RTE Power Management via userspace ACPI cpufreq + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Initialize power management for a specific lcore. It will check and set the + * governor to userspace for the lcore, get the available frequencies, and + * prepare to set new lcore frequency. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_acpi_cpufreq_init(unsigned lcore_id); + +/** + * Exit power management on a specific lcore. It will set the governor to which + * is before initialized. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_acpi_cpufreq_exit(unsigned lcore_id); + +/** + * Get the available frequencies of a specific lcore. The return value will be + * the minimal one of the total number of available frequencies and the number + * of buffer. The index of available frequencies used in other interfaces + * should be in the range of 0 to this return value. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * @param freqs + * The buffer array to save the frequencies. + * @param num + * The number of frequencies to get. + * + * @return + * The number of available frequencies. + */ +uint32_t rte_power_acpi_cpufreq_freqs(unsigned lcore_id, uint32_t *freqs, + uint32_t num); + +/** + * Return the current index of available frequencies of a specific lcore. It + * will return 'RTE_POWER_INVALID_FREQ_INDEX = (~0)' if error. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * The current index of available frequencies. + */ +uint32_t rte_power_acpi_cpufreq_get_freq(unsigned lcore_id); + +/** + * Set the new frequency for a specific lcore by indicating the index of + * available frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * @param index + * The index of available frequencies. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +int rte_power_acpi_cpufreq_set_freq(unsigned lcore_id, uint32_t index); + +/** + * Scale up the frequency of a specific lcore according to the available + * frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +int rte_power_acpi_cpufreq_freq_up(unsigned lcore_id); + +/** + * Scale down the frequency of a specific lcore according to the available + * frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +int rte_power_acpi_cpufreq_freq_down(unsigned lcore_id); + +/** + * Scale up the frequency of a specific lcore to the highest according to the + * available frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency changed. + * - Negative on error. + */ +int rte_power_acpi_cpufreq_freq_max(unsigned lcore_id); + +/** + * Scale down the frequency of a specific lcore to the lowest according to the + * available frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success with frequency changed. + * - 0 on success without frequency chnaged. + * - Negative on error. + */ +int rte_power_acpi_cpufreq_freq_min(unsigned lcore_id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_power/rte_power_common.h b/lib/librte_power/rte_power_common.h new file mode 100644 index 00000000..64bd168f --- /dev/null +++ b/lib/librte_power/rte_power_common.h @@ -0,0 +1,39 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef RTE_POWER_COMMON_H_ +#define RTE_POWER_COMMON_H_ + +#define RTE_POWER_INVALID_FREQ_INDEX (~0) + +#endif /* RTE_POWER_COMMON_H_ */ diff --git a/lib/librte_power/rte_power_kvm_vm.c b/lib/librte_power/rte_power_kvm_vm.c new file mode 100644 index 00000000..7bb2774c --- /dev/null +++ b/lib/librte_power/rte_power_kvm_vm.c @@ -0,0 +1,135 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include + +#include "guest_channel.h" +#include "channel_commands.h" +#include "rte_power_kvm_vm.h" +#include "rte_power_common.h" + +#define FD_PATH "/dev/virtio-ports/virtio.serial.port.poweragent" + +static struct channel_packet pkt[CHANNEL_CMDS_MAX_VM_CHANNELS]; + + +int +rte_power_kvm_vm_init(unsigned lcore_id) +{ + if (lcore_id >= CHANNEL_CMDS_MAX_VM_CHANNELS) { + RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n", + lcore_id, CHANNEL_CMDS_MAX_VM_CHANNELS-1); + return -1; + } + pkt[lcore_id].command = CPU_POWER; + pkt[lcore_id].resource_id = lcore_id; + return guest_channel_host_connect(FD_PATH, lcore_id); +} + +int +rte_power_kvm_vm_exit(unsigned lcore_id) +{ + guest_channel_host_disconnect(lcore_id); + return 0; +} + +uint32_t +rte_power_kvm_vm_freqs(__attribute__((unused)) unsigned lcore_id, + __attribute__((unused)) uint32_t *freqs, + __attribute__((unused)) uint32_t num) +{ + RTE_LOG(ERR, POWER, "rte_power_freqs is not implemented " + "for Virtual Machine Power Management\n"); + return -ENOTSUP; +} + +uint32_t +rte_power_kvm_vm_get_freq(__attribute__((unused)) unsigned lcore_id) +{ + RTE_LOG(ERR, POWER, "rte_power_get_freq is not implemented " + "for Virtual Machine Power Management\n"); + return -ENOTSUP; +} + +int +rte_power_kvm_vm_set_freq(__attribute__((unused)) unsigned lcore_id, + __attribute__((unused)) uint32_t index) +{ + RTE_LOG(ERR, POWER, "rte_power_set_freq is not implemented " + "for Virtual Machine Power Management\n"); + return -ENOTSUP; +} + +static inline int +send_msg(unsigned lcore_id, uint32_t scale_direction) +{ + int ret; + + if (lcore_id >= CHANNEL_CMDS_MAX_VM_CHANNELS) { + RTE_LOG(ERR, POWER, "Core(%u) is out of range 0...%d\n", + lcore_id, CHANNEL_CMDS_MAX_VM_CHANNELS-1); + return -1; + } + pkt[lcore_id].unit = scale_direction; + ret = guest_channel_send_msg(&pkt[lcore_id], lcore_id); + if (ret == 0) + return 1; + RTE_LOG(DEBUG, POWER, "Error sending message: %s\n", strerror(ret)); + return -1; +} + +int +rte_power_kvm_vm_freq_up(unsigned lcore_id) +{ + return send_msg(lcore_id, CPU_POWER_SCALE_UP); +} + +int +rte_power_kvm_vm_freq_down(unsigned lcore_id) +{ + return send_msg(lcore_id, CPU_POWER_SCALE_DOWN); +} + +int +rte_power_kvm_vm_freq_max(unsigned lcore_id) +{ + return send_msg(lcore_id, CPU_POWER_SCALE_MAX); +} + +int +rte_power_kvm_vm_freq_min(unsigned lcore_id) +{ + return send_msg(lcore_id, CPU_POWER_SCALE_MIN); +} diff --git a/lib/librte_power/rte_power_kvm_vm.h b/lib/librte_power/rte_power_kvm_vm.h new file mode 100644 index 00000000..dcbc878a --- /dev/null +++ b/lib/librte_power/rte_power_kvm_vm.h @@ -0,0 +1,179 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_POWER_KVM_VM_H +#define _RTE_POWER_KVM_VM_H + +/** + * @file + * RTE Power Management KVM VM + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Initialize power management for a specific lcore. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_init(unsigned lcore_id); + +/** + * Exit power management on a specific lcore. + * + * @param lcore_id + * lcore id. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_exit(unsigned lcore_id); + +/** + * Get the available frequencies of a specific lcore. + * It is not currently supported for VM Power Management. + * + * @param lcore_id + * lcore id. + * @param freqs + * The buffer array to save the frequencies. + * @param num + * The number of frequencies to get. + * + * @return + * -ENOTSUP + */ +uint32_t rte_power_kvm_vm_freqs(unsigned lcore_id, uint32_t *freqs, + uint32_t num); + +/** + * Return the current index of available frequencies of a specific lcore. + * It is not currently supported for VM Power Management. + * + * @param lcore_id + * lcore id. + * + * @return + * -ENOTSUP + */ +uint32_t rte_power_kvm_vm_get_freq(unsigned lcore_id); + +/** + * Set the new frequency for a specific lcore by indicating the index of + * available frequencies. + * It is not currently supported for VM Power Management. + * + * @param lcore_id + * lcore id. + * @param index + * The index of available frequencies. + * + * @return + * -ENOTSUP + */ +int rte_power_kvm_vm_set_freq(unsigned lcore_id, uint32_t index); + +/** + * Scale up the frequency of a specific lcore. This request is forwarded to the + * host monitor. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_freq_up(unsigned lcore_id); + +/** + * Scale down the frequency of a specific lcore according to the available + * frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_freq_down(unsigned lcore_id); + +/** + * Scale up the frequency of a specific lcore to the highest according to the + * available frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_freq_max(unsigned lcore_id); + +/** + * Scale down the frequency of a specific lcore to the lowest according to the + * available frequencies. + * It should be protected outside of this function for threadsafe. + * + * @param lcore_id + * lcore id. + * + * @return + * - 1 on success. + * - Negative on error. + */ +int rte_power_kvm_vm_freq_min(unsigned lcore_id); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_power/rte_power_version.map b/lib/librte_power/rte_power_version.map new file mode 100644 index 00000000..db75ff3e --- /dev/null +++ b/lib/librte_power/rte_power_version.map @@ -0,0 +1,18 @@ +DPDK_2.0 { + global: + + rte_power_exit; + rte_power_freq_down; + rte_power_freq_max; + rte_power_freq_min; + rte_power_freq_up; + rte_power_freqs; + rte_power_get_env; + rte_power_get_freq; + rte_power_init; + rte_power_set_env; + rte_power_set_freq; + rte_power_unset_env; + + local: *; +}; diff --git a/lib/librte_reorder/Makefile b/lib/librte_reorder/Makefile new file mode 100644 index 00000000..0c01de16 --- /dev/null +++ b/lib/librte_reorder/Makefile @@ -0,0 +1,54 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_reorder.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) + +EXPORT_MAP := rte_reorder_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_REORDER) := rte_reorder.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_REORDER)-include := rte_reorder.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_REORDER) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_reorder/rte_reorder.c b/lib/librte_reorder/rte_reorder.c new file mode 100644 index 00000000..010dff68 --- /dev/null +++ b/lib/librte_reorder/rte_reorder.c @@ -0,0 +1,411 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "rte_reorder.h" + +TAILQ_HEAD(rte_reorder_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_reorder_tailq = { + .name = "RTE_REORDER", +}; +EAL_REGISTER_TAILQ(rte_reorder_tailq) + +#define NO_FLAGS 0 +#define RTE_REORDER_PREFIX "RO_" +#define RTE_REORDER_NAMESIZE 32 + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_REORDER RTE_LOGTYPE_USER1 + +/* A generic circular buffer */ +struct cir_buffer { + unsigned int size; /**< Number of entries that can be stored */ + unsigned int mask; /**< [buffer_size - 1]: used for wrap-around */ + unsigned int head; /**< insertion point in buffer */ + unsigned int tail; /**< extraction point in buffer */ + struct rte_mbuf **entries; +} __rte_cache_aligned; + +/* The reorder buffer data structure itself */ +struct rte_reorder_buffer { + char name[RTE_REORDER_NAMESIZE]; + uint32_t min_seqn; /**< Lowest seq. number that can be in the buffer */ + unsigned int memsize; /**< memory area size of reorder buffer */ + struct cir_buffer ready_buf; /**< temp buffer for dequeued entries */ + struct cir_buffer order_buf; /**< buffer used to reorder entries */ + int is_initialized; +} __rte_cache_aligned; + +static void +rte_reorder_free_mbufs(struct rte_reorder_buffer *b); + +struct rte_reorder_buffer * +rte_reorder_init(struct rte_reorder_buffer *b, unsigned int bufsize, + const char *name, unsigned int size) +{ + const unsigned int min_bufsize = sizeof(*b) + + (2 * size * sizeof(struct rte_mbuf *)); + + if (b == NULL) { + RTE_LOG(ERR, REORDER, "Invalid reorder buffer parameter:" + " NULL\n"); + rte_errno = EINVAL; + return NULL; + } + if (!rte_is_power_of_2(size)) { + RTE_LOG(ERR, REORDER, "Invalid reorder buffer size" + " - Not a power of 2\n"); + rte_errno = EINVAL; + return NULL; + } + if (name == NULL) { + RTE_LOG(ERR, REORDER, "Invalid reorder buffer name ptr:" + " NULL\n"); + rte_errno = EINVAL; + return NULL; + } + if (bufsize < min_bufsize) { + RTE_LOG(ERR, REORDER, "Invalid reorder buffer memory size: %u, " + "minimum required: %u\n", bufsize, min_bufsize); + rte_errno = EINVAL; + return NULL; + } + + memset(b, 0, bufsize); + snprintf(b->name, sizeof(b->name), "%s", name); + b->memsize = bufsize; + b->order_buf.size = b->ready_buf.size = size; + b->order_buf.mask = b->ready_buf.mask = size - 1; + b->ready_buf.entries = (void *)&b[1]; + b->order_buf.entries = RTE_PTR_ADD(&b[1], + size * sizeof(b->ready_buf.entries[0])); + + return b; +} + +struct rte_reorder_buffer* +rte_reorder_create(const char *name, unsigned socket_id, unsigned int size) +{ + struct rte_reorder_buffer *b = NULL; + struct rte_tailq_entry *te; + struct rte_reorder_list *reorder_list; + const unsigned int bufsize = sizeof(struct rte_reorder_buffer) + + (2 * size * sizeof(struct rte_mbuf *)); + + reorder_list = RTE_TAILQ_CAST(rte_reorder_tailq.head, rte_reorder_list); + + /* Check user arguments. */ + if (!rte_is_power_of_2(size)) { + RTE_LOG(ERR, REORDER, "Invalid reorder buffer size" + " - Not a power of 2\n"); + rte_errno = EINVAL; + return NULL; + } + if (name == NULL) { + RTE_LOG(ERR, REORDER, "Invalid reorder buffer name ptr:" + " NULL\n"); + rte_errno = EINVAL; + return NULL; + } + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* guarantee there's no existing */ + TAILQ_FOREACH(te, reorder_list, next) { + b = (struct rte_reorder_buffer *) te->data; + if (strncmp(name, b->name, RTE_REORDER_NAMESIZE) == 0) + break; + } + if (te != NULL) + goto exit; + + /* allocate tailq entry */ + te = rte_zmalloc("REORDER_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, REORDER, "Failed to allocate tailq entry\n"); + rte_errno = ENOMEM; + b = NULL; + goto exit; + } + + /* Allocate memory to store the reorder buffer structure. */ + b = rte_zmalloc_socket("REORDER_BUFFER", bufsize, 0, socket_id); + if (b == NULL) { + RTE_LOG(ERR, REORDER, "Memzone allocation failed\n"); + rte_errno = ENOMEM; + rte_free(te); + } else { + rte_reorder_init(b, bufsize, name, size); + te->data = (void *)b; + TAILQ_INSERT_TAIL(reorder_list, te, next); + } + +exit: + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return b; +} + +void +rte_reorder_reset(struct rte_reorder_buffer *b) +{ + char name[RTE_REORDER_NAMESIZE]; + + rte_reorder_free_mbufs(b); + snprintf(name, sizeof(name), "%s", b->name); + /* No error checking as current values should be valid */ + rte_reorder_init(b, b->memsize, name, b->order_buf.size); +} + +static void +rte_reorder_free_mbufs(struct rte_reorder_buffer *b) +{ + unsigned i; + + /* Free up the mbufs of order buffer & ready buffer */ + for (i = 0; i < b->order_buf.size; i++) { + if (b->order_buf.entries[i]) + rte_pktmbuf_free(b->order_buf.entries[i]); + if (b->ready_buf.entries[i]) + rte_pktmbuf_free(b->ready_buf.entries[i]); + } +} + +void +rte_reorder_free(struct rte_reorder_buffer *b) +{ + struct rte_reorder_list *reorder_list; + struct rte_tailq_entry *te; + + /* Check user arguments. */ + if (b == NULL) + return; + + reorder_list = RTE_TAILQ_CAST(rte_reorder_tailq.head, rte_reorder_list); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find our tailq entry */ + TAILQ_FOREACH(te, reorder_list, next) { + if (te->data == (void *) b) + break; + } + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(reorder_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_reorder_free_mbufs(b); + + rte_free(b); + rte_free(te); +} + +struct rte_reorder_buffer * +rte_reorder_find_existing(const char *name) +{ + struct rte_reorder_buffer *b = NULL; + struct rte_tailq_entry *te; + struct rte_reorder_list *reorder_list; + + reorder_list = RTE_TAILQ_CAST(rte_reorder_tailq.head, rte_reorder_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + TAILQ_FOREACH(te, reorder_list, next) { + b = (struct rte_reorder_buffer *) te->data; + if (strncmp(name, b->name, RTE_REORDER_NAMESIZE) == 0) + break; + } + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return b; +} + +static unsigned +rte_reorder_fill_overflow(struct rte_reorder_buffer *b, unsigned n) +{ + /* + * 1. Move all ready entries that fit to the ready_buf + * 2. check if we meet the minimum needed (n). + * 3. If not, then skip any gaps and keep moving. + * 4. If at any point the ready buffer is full, stop + * 5. Return the number of positions the order_buf head has moved + */ + + struct cir_buffer *order_buf = &b->order_buf, + *ready_buf = &b->ready_buf; + + unsigned int order_head_adv = 0; + + /* + * move at least n packets to ready buffer, assuming ready buffer + * has room for those packets. + */ + while (order_head_adv < n && + ((ready_buf->head + 1) & ready_buf->mask) != ready_buf->tail) { + + /* if we are blocked waiting on a packet, skip it */ + if (order_buf->entries[order_buf->head] == NULL) { + order_buf->head = (order_buf->head + 1) & order_buf->mask; + order_head_adv++; + } + + /* Move all ready entries that fit to the ready_buf */ + while (order_buf->entries[order_buf->head] != NULL) { + ready_buf->entries[ready_buf->head] = + order_buf->entries[order_buf->head]; + + order_buf->entries[order_buf->head] = NULL; + order_head_adv++; + + order_buf->head = (order_buf->head + 1) & order_buf->mask; + + if (((ready_buf->head + 1) & ready_buf->mask) == ready_buf->tail) + break; + + ready_buf->head = (ready_buf->head + 1) & ready_buf->mask; + } + } + + b->min_seqn += order_head_adv; + /* Return the number of positions the order_buf head has moved */ + return order_head_adv; +} + +int +rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf) +{ + uint32_t offset, position; + struct cir_buffer *order_buf = &b->order_buf; + + if (!b->is_initialized) { + b->min_seqn = mbuf->seqn; + b->is_initialized = 1; + } + + /* + * calculate the offset from the head pointer we need to go. + * The subtraction takes care of the sequence number wrapping. + * For example (using 16-bit for brevity): + * min_seqn = 0xFFFD + * mbuf_seqn = 0x0010 + * offset = 0x0010 - 0xFFFD = 0x13 + */ + offset = mbuf->seqn - b->min_seqn; + + /* + * action to take depends on offset. + * offset < buffer->size: the mbuf fits within the current window of + * sequence numbers we can reorder. EXPECTED CASE. + * offset > buffer->size: the mbuf is outside the current window. There + * are a number of cases to consider: + * 1. The packet sequence is just outside the window, then we need + * to see about shifting the head pointer and taking any ready + * to return packets out of the ring. If there was a delayed + * or dropped packet preventing drains from shifting the window + * this case will skip over the dropped packet instead, and any + * packets dequeued here will be returned on the next drain call. + * 2. The packet sequence number is vastly outside our window, taken + * here as having offset greater than twice the buffer size. In + * this case, the packet is probably an old or late packet that + * was previously skipped, so just enqueue the packet for + * immediate return on the next drain call, or else return error. + */ + if (offset < b->order_buf.size) { + position = (order_buf->head + offset) & order_buf->mask; + order_buf->entries[position] = mbuf; + } else if (offset < 2 * b->order_buf.size) { + if (rte_reorder_fill_overflow(b, offset + 1 - order_buf->size) + < (offset + 1 - order_buf->size)) { + /* Put in handling for enqueue straight to output */ + rte_errno = ENOSPC; + return -1; + } + offset = mbuf->seqn - b->min_seqn; + position = (order_buf->head + offset) & order_buf->mask; + order_buf->entries[position] = mbuf; + } else { + /* Put in handling for enqueue straight to output */ + rte_errno = ERANGE; + return -1; + } + return 0; +} + +unsigned int +rte_reorder_drain(struct rte_reorder_buffer *b, struct rte_mbuf **mbufs, + unsigned max_mbufs) +{ + unsigned int drain_cnt = 0; + + struct cir_buffer *order_buf = &b->order_buf, + *ready_buf = &b->ready_buf; + + /* Try to fetch requested number of mbufs from ready buffer */ + while ((drain_cnt < max_mbufs) && (ready_buf->tail != ready_buf->head)) { + mbufs[drain_cnt++] = ready_buf->entries[ready_buf->tail]; + ready_buf->tail = (ready_buf->tail + 1) & ready_buf->mask; + } + + /* + * If requested number of buffers not fetched from ready buffer, fetch + * remaining buffers from order buffer + */ + while ((drain_cnt < max_mbufs) && + (order_buf->entries[order_buf->head] != NULL)) { + mbufs[drain_cnt++] = order_buf->entries[order_buf->head]; + order_buf->entries[order_buf->head] = NULL; + b->min_seqn++; + order_buf->head = (order_buf->head + 1) & order_buf->mask; + } + + return drain_cnt; +} diff --git a/lib/librte_reorder/rte_reorder.h b/lib/librte_reorder/rte_reorder.h new file mode 100644 index 00000000..c7a2934c --- /dev/null +++ b/lib/librte_reorder/rte_reorder.h @@ -0,0 +1,180 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_REORDER_H_ +#define _RTE_REORDER_H_ + +/** + * @file + * RTE reorder + * + * Reorder library is a component which is designed to + * provide ordering of out of ordered packets based on + * sequence number present in mbuf. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +struct rte_reorder_buffer; + +/** + * Create a new reorder buffer instance + * + * Allocate memory and initialize a new reorder buffer in that + * memory, returning the reorder buffer pointer to the user + * + * @param name + * The name to be given to the reorder buffer instance. + * @param socket_id + * The NUMA node on which the memory for the reorder buffer + * instance is to be reserved. + * @param size + * Max number of elements that can be stored in the reorder buffer + * @return + * The initialized reorder buffer instance, or NULL on error + * On error case, rte_errno will be set appropriately: + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +struct rte_reorder_buffer * +rte_reorder_create(const char *name, unsigned socket_id, unsigned int size); + +/** + * Initializes given reorder buffer instance + * + * @param b + * Reorder buffer instance to initialize + * @param bufsize + * Size of the reorder buffer + * @param name + * The name to be given to the reorder buffer + * @param size + * Number of elements that can be stored in reorder buffer + * @return + * The initialized reorder buffer instance, or NULL on error + * On error case, rte_errno will be set appropriately: + * - EINVAL - invalid parameters + */ +struct rte_reorder_buffer * +rte_reorder_init(struct rte_reorder_buffer *b, unsigned int bufsize, + const char *name, unsigned int size); + +/** + * Find an existing reorder buffer instance + * and return a pointer to it. + * + * @param name + * Name of the reorder buffer instacne as passed to rte_reorder_create() + * @return + * Pointer to reorder buffer instance or NULL if object not found with rte_errno + * set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + * reorder instance list + */ +struct rte_reorder_buffer * +rte_reorder_find_existing(const char *name); + +/** + * Reset the given reorder buffer instance with initial values. + * + * @param b + * Reorder buffer instance which has to be reset + */ +void +rte_reorder_reset(struct rte_reorder_buffer *b); + +/** + * Free reorder buffer instance. + * + * @param b + * reorder buffer instance + * @return + * None + */ +void +rte_reorder_free(struct rte_reorder_buffer *b); + +/** + * Insert given mbuf in reorder buffer in its correct position + * + * The given mbuf is to be reordered relative to other mbufs in the system. + * The mbuf must contain a sequence number which is then used to place + * the buffer in the correct position in the reorder buffer. Reordered + * packets can later be taken from the buffer using the rte_reorder_drain() + * API. + * + * @param b + * Reorder buffer where the mbuf has to be inserted. + * @param mbuf + * mbuf of packet that needs to be inserted in reorder buffer. + * @return + * 0 on success + * -1 on error + * On error case, rte_errno will be set appropriately: + * - ENOSPC - Cannot move existing mbufs from reorder buffer to accommodate + * ealry mbuf, but it can be accomodated by performing drain and then insert. + * - ERANGE - Too early or late mbuf which is vastly out of range of expected + * window should be ingnored without any handling. + */ +int +rte_reorder_insert(struct rte_reorder_buffer *b, struct rte_mbuf *mbuf); + +/** + * Fetch reordered buffers + * + * Returns a set of in-order buffers from the reorder buffer structure. Gaps + * may be present in the sequence numbers of the mbuf if packets have been + * delayed too long before reaching the reorder window, or have been previously + * dropped by the system. + * + * @param b + * Reorder buffer instance from which packets are to be drained + * @param mbufs + * array of mbufs where reordered packets will be inserted from reorder buffer + * @param max_mbufs + * the number of elements in the mbufs array. + * @return + * number of mbuf pointers written to mbufs. 0 <= N < max_mbufs. + */ +unsigned int +rte_reorder_drain(struct rte_reorder_buffer *b, struct rte_mbuf **mbufs, + unsigned max_mbufs); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_REORDER_H_ */ diff --git a/lib/librte_reorder/rte_reorder_version.map b/lib/librte_reorder/rte_reorder_version.map new file mode 100644 index 00000000..0a8a54de --- /dev/null +++ b/lib/librte_reorder/rte_reorder_version.map @@ -0,0 +1,13 @@ +DPDK_2.0 { + global: + + rte_reorder_create; + rte_reorder_init; + rte_reorder_find_existing; + rte_reorder_reset; + rte_reorder_free; + rte_reorder_insert; + rte_reorder_drain; + + local: *; +}; diff --git a/lib/librte_ring/Makefile b/lib/librte_ring/Makefile new file mode 100644 index 00000000..4b1112e4 --- /dev/null +++ b/lib/librte_ring/Makefile @@ -0,0 +1,51 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_ring.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_ring_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_RING) := rte_ring.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_RING)-include := rte_ring.h + +DEPDIRS-$(CONFIG_RTE_LIBRTE_RING) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_ring/rte_ring.c b/lib/librte_ring/rte_ring.c new file mode 100644 index 00000000..d80faf3b --- /dev/null +++ b/lib/librte_ring/rte_ring.c @@ -0,0 +1,373 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Derived from FreeBSD's bufring.c + * + ************************************************************************** + * + * Copyright (c) 2007,2008 Kip Macy kmacy@freebsd.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. The name of Kip Macy nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ***************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_ring.h" + +TAILQ_HEAD(rte_ring_list, rte_tailq_entry); + +static struct rte_tailq_elem rte_ring_tailq = { + .name = RTE_TAILQ_RING_NAME, +}; +EAL_REGISTER_TAILQ(rte_ring_tailq) + +/* true if x is a power of 2 */ +#define POWEROF2(x) ((((x)-1) & (x)) == 0) + +/* return the size of memory occupied by a ring */ +ssize_t +rte_ring_get_memsize(unsigned count) +{ + ssize_t sz; + + /* count must be a power of 2 */ + if ((!POWEROF2(count)) || (count > RTE_RING_SZ_MASK )) { + RTE_LOG(ERR, RING, + "Requested size is invalid, must be power of 2, and " + "do not exceed the size limit %u\n", RTE_RING_SZ_MASK); + return -EINVAL; + } + + sz = sizeof(struct rte_ring) + count * sizeof(void *); + sz = RTE_ALIGN(sz, RTE_CACHE_LINE_SIZE); + return sz; +} + +int +rte_ring_init(struct rte_ring *r, const char *name, unsigned count, + unsigned flags) +{ + /* compilation-time checks */ + RTE_BUILD_BUG_ON((sizeof(struct rte_ring) & + RTE_CACHE_LINE_MASK) != 0); +#ifdef RTE_RING_SPLIT_PROD_CONS + RTE_BUILD_BUG_ON((offsetof(struct rte_ring, cons) & + RTE_CACHE_LINE_MASK) != 0); +#endif + RTE_BUILD_BUG_ON((offsetof(struct rte_ring, prod) & + RTE_CACHE_LINE_MASK) != 0); +#ifdef RTE_LIBRTE_RING_DEBUG + RTE_BUILD_BUG_ON((sizeof(struct rte_ring_debug_stats) & + RTE_CACHE_LINE_MASK) != 0); + RTE_BUILD_BUG_ON((offsetof(struct rte_ring, stats) & + RTE_CACHE_LINE_MASK) != 0); +#endif + + /* init the ring structure */ + memset(r, 0, sizeof(*r)); + snprintf(r->name, sizeof(r->name), "%s", name); + r->flags = flags; + r->prod.watermark = count; + r->prod.sp_enqueue = !!(flags & RING_F_SP_ENQ); + r->cons.sc_dequeue = !!(flags & RING_F_SC_DEQ); + r->prod.size = r->cons.size = count; + r->prod.mask = r->cons.mask = count-1; + r->prod.head = r->cons.head = 0; + r->prod.tail = r->cons.tail = 0; + + return 0; +} + +/* create the ring */ +struct rte_ring * +rte_ring_create(const char *name, unsigned count, int socket_id, + unsigned flags) +{ + char mz_name[RTE_MEMZONE_NAMESIZE]; + struct rte_ring *r; + struct rte_tailq_entry *te; + const struct rte_memzone *mz; + ssize_t ring_size; + int mz_flags = 0; + struct rte_ring_list* ring_list = NULL; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + + ring_size = rte_ring_get_memsize(count); + if (ring_size < 0) { + rte_errno = ring_size; + return NULL; + } + + te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, RING, "Cannot reserve memory for tailq\n"); + rte_errno = ENOMEM; + return NULL; + } + + snprintf(mz_name, sizeof(mz_name), "%s%s", RTE_RING_MZ_PREFIX, name); + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* reserve a memory zone for this ring. If we can't get rte_config or + * we are secondary process, the memzone_reserve function will set + * rte_errno for us appropriately - hence no check in this this function */ + mz = rte_memzone_reserve(mz_name, ring_size, socket_id, mz_flags); + if (mz != NULL) { + r = mz->addr; + /* no need to check return value here, we already checked the + * arguments above */ + rte_ring_init(r, name, count, flags); + + te->data = (void *) r; + r->memzone = mz; + + TAILQ_INSERT_TAIL(ring_list, te, next); + } else { + r = NULL; + RTE_LOG(ERR, RING, "Cannot reserve memory\n"); + rte_free(te); + } + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + return r; +} + +/* free the ring */ +void +rte_ring_free(struct rte_ring *r) +{ + struct rte_ring_list *ring_list = NULL; + struct rte_tailq_entry *te; + + if (r == NULL) + return; + + /* + * Ring was not created with rte_ring_create, + * therefore, there is no memzone to free. + */ + if (r->memzone == NULL) { + RTE_LOG(ERR, RING, "Cannot free ring (not created with rte_ring_create()"); + return; + } + + if (rte_memzone_free(r->memzone) != 0) { + RTE_LOG(ERR, RING, "Cannot free memory\n"); + return; + } + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find out tailq entry */ + TAILQ_FOREACH(te, ring_list, next) { + if (te->data == (void *) r) + break; + } + + if (te == NULL) { + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + return; + } + + TAILQ_REMOVE(ring_list, te, next); + + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + + rte_free(te); +} + +/* + * change the high water mark. If *count* is 0, water marking is + * disabled + */ +int +rte_ring_set_water_mark(struct rte_ring *r, unsigned count) +{ + if (count >= r->prod.size) + return -EINVAL; + + /* if count is 0, disable the watermarking */ + if (count == 0) + count = r->prod.size; + + r->prod.watermark = count; + return 0; +} + +/* dump the status of the ring on the console */ +void +rte_ring_dump(FILE *f, const struct rte_ring *r) +{ +#ifdef RTE_LIBRTE_RING_DEBUG + struct rte_ring_debug_stats sum; + unsigned lcore_id; +#endif + + fprintf(f, "ring <%s>@%p\n", r->name, r); + fprintf(f, " flags=%x\n", r->flags); + fprintf(f, " size=%"PRIu32"\n", r->prod.size); + fprintf(f, " ct=%"PRIu32"\n", r->cons.tail); + fprintf(f, " ch=%"PRIu32"\n", r->cons.head); + fprintf(f, " pt=%"PRIu32"\n", r->prod.tail); + fprintf(f, " ph=%"PRIu32"\n", r->prod.head); + fprintf(f, " used=%u\n", rte_ring_count(r)); + fprintf(f, " avail=%u\n", rte_ring_free_count(r)); + if (r->prod.watermark == r->prod.size) + fprintf(f, " watermark=0\n"); + else + fprintf(f, " watermark=%"PRIu32"\n", r->prod.watermark); + + /* sum and dump statistics */ +#ifdef RTE_LIBRTE_RING_DEBUG + memset(&sum, 0, sizeof(sum)); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + sum.enq_success_bulk += r->stats[lcore_id].enq_success_bulk; + sum.enq_success_objs += r->stats[lcore_id].enq_success_objs; + sum.enq_quota_bulk += r->stats[lcore_id].enq_quota_bulk; + sum.enq_quota_objs += r->stats[lcore_id].enq_quota_objs; + sum.enq_fail_bulk += r->stats[lcore_id].enq_fail_bulk; + sum.enq_fail_objs += r->stats[lcore_id].enq_fail_objs; + sum.deq_success_bulk += r->stats[lcore_id].deq_success_bulk; + sum.deq_success_objs += r->stats[lcore_id].deq_success_objs; + sum.deq_fail_bulk += r->stats[lcore_id].deq_fail_bulk; + sum.deq_fail_objs += r->stats[lcore_id].deq_fail_objs; + } + fprintf(f, " size=%"PRIu32"\n", r->prod.size); + fprintf(f, " enq_success_bulk=%"PRIu64"\n", sum.enq_success_bulk); + fprintf(f, " enq_success_objs=%"PRIu64"\n", sum.enq_success_objs); + fprintf(f, " enq_quota_bulk=%"PRIu64"\n", sum.enq_quota_bulk); + fprintf(f, " enq_quota_objs=%"PRIu64"\n", sum.enq_quota_objs); + fprintf(f, " enq_fail_bulk=%"PRIu64"\n", sum.enq_fail_bulk); + fprintf(f, " enq_fail_objs=%"PRIu64"\n", sum.enq_fail_objs); + fprintf(f, " deq_success_bulk=%"PRIu64"\n", sum.deq_success_bulk); + fprintf(f, " deq_success_objs=%"PRIu64"\n", sum.deq_success_objs); + fprintf(f, " deq_fail_bulk=%"PRIu64"\n", sum.deq_fail_bulk); + fprintf(f, " deq_fail_objs=%"PRIu64"\n", sum.deq_fail_objs); +#else + fprintf(f, " no statistics available\n"); +#endif +} + +/* dump the status of all rings on the console */ +void +rte_ring_list_dump(FILE *f) +{ + const struct rte_tailq_entry *te; + struct rte_ring_list *ring_list; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH(te, ring_list, next) { + rte_ring_dump(f, (struct rte_ring *) te->data); + } + + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); +} + +/* search a ring from its name */ +struct rte_ring * +rte_ring_lookup(const char *name) +{ + struct rte_tailq_entry *te; + struct rte_ring *r = NULL; + struct rte_ring_list *ring_list; + + ring_list = RTE_TAILQ_CAST(rte_ring_tailq.head, rte_ring_list); + + rte_rwlock_read_lock(RTE_EAL_TAILQ_RWLOCK); + + TAILQ_FOREACH(te, ring_list, next) { + r = (struct rte_ring *) te->data; + if (strncmp(name, r->name, RTE_RING_NAMESIZE) == 0) + break; + } + + rte_rwlock_read_unlock(RTE_EAL_TAILQ_RWLOCK); + + if (te == NULL) { + rte_errno = ENOENT; + return NULL; + } + + return r; +} diff --git a/lib/librte_ring/rte_ring.h b/lib/librte_ring/rte_ring.h new file mode 100644 index 00000000..eb45e414 --- /dev/null +++ b/lib/librte_ring/rte_ring.h @@ -0,0 +1,1261 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Derived from FreeBSD's bufring.h + * + ************************************************************************** + * + * Copyright (c) 2007-2009 Kip Macy kmacy@freebsd.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. The name of Kip Macy nor the names of other + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + * + ***************************************************************************/ + +#ifndef _RTE_RING_H_ +#define _RTE_RING_H_ + +/** + * @file + * RTE Ring + * + * The Ring Manager is a fixed-size queue, implemented as a table of + * pointers. Head and tail pointers are modified atomically, allowing + * concurrent access to it. It has the following features: + * + * - FIFO (First In First Out) + * - Maximum size is fixed; the pointers are stored in a table. + * - Lockless implementation. + * - Multi- or single-consumer dequeue. + * - Multi- or single-producer enqueue. + * - Bulk dequeue. + * - Bulk enqueue. + * + * Note: the ring implementation is not preemptable. A lcore must not + * be interrupted by another task that uses the same ring. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define RTE_TAILQ_RING_NAME "RTE_RING" + +enum rte_ring_queue_behavior { + RTE_RING_QUEUE_FIXED = 0, /* Enq/Deq a fixed number of items from a ring */ + RTE_RING_QUEUE_VARIABLE /* Enq/Deq as many items a possible from ring */ +}; + +#ifdef RTE_LIBRTE_RING_DEBUG +/** + * A structure that stores the ring statistics (per-lcore). + */ +struct rte_ring_debug_stats { + uint64_t enq_success_bulk; /**< Successful enqueues number. */ + uint64_t enq_success_objs; /**< Objects successfully enqueued. */ + uint64_t enq_quota_bulk; /**< Successful enqueues above watermark. */ + uint64_t enq_quota_objs; /**< Objects enqueued above watermark. */ + uint64_t enq_fail_bulk; /**< Failed enqueues number. */ + uint64_t enq_fail_objs; /**< Objects that failed to be enqueued. */ + uint64_t deq_success_bulk; /**< Successful dequeues number. */ + uint64_t deq_success_objs; /**< Objects successfully dequeued. */ + uint64_t deq_fail_bulk; /**< Failed dequeues number. */ + uint64_t deq_fail_objs; /**< Objects that failed to be dequeued. */ +} __rte_cache_aligned; +#endif + +#define RTE_RING_NAMESIZE 32 /**< The maximum length of a ring name. */ +#define RTE_RING_MZ_PREFIX "RG_" + +#ifndef RTE_RING_PAUSE_REP_COUNT +#define RTE_RING_PAUSE_REP_COUNT 0 /**< Yield after pause num of times, no yield + * if RTE_RING_PAUSE_REP not defined. */ +#endif + +struct rte_memzone; /* forward declaration, so as not to require memzone.h */ + +/** + * An RTE ring structure. + * + * The producer and the consumer have a head and a tail index. The particularity + * of these index is that they are not between 0 and size(ring). These indexes + * are between 0 and 2^32, and we mask their value when we access the ring[] + * field. Thanks to this assumption, we can do subtractions between 2 index + * values in a modulo-32bit base: that's why the overflow of the indexes is not + * a problem. + */ +struct rte_ring { + char name[RTE_RING_NAMESIZE]; /**< Name of the ring. */ + int flags; /**< Flags supplied at creation. */ + const struct rte_memzone *memzone; + /**< Memzone, if any, containing the rte_ring */ + + /** Ring producer status. */ + struct prod { + uint32_t watermark; /**< Maximum items before EDQUOT. */ + uint32_t sp_enqueue; /**< True, if single producer. */ + uint32_t size; /**< Size of ring. */ + uint32_t mask; /**< Mask (size-1) of ring. */ + volatile uint32_t head; /**< Producer head. */ + volatile uint32_t tail; /**< Producer tail. */ + } prod __rte_cache_aligned; + + /** Ring consumer status. */ + struct cons { + uint32_t sc_dequeue; /**< True, if single consumer. */ + uint32_t size; /**< Size of the ring. */ + uint32_t mask; /**< Mask (size-1) of ring. */ + volatile uint32_t head; /**< Consumer head. */ + volatile uint32_t tail; /**< Consumer tail. */ +#ifdef RTE_RING_SPLIT_PROD_CONS + } cons __rte_cache_aligned; +#else + } cons; +#endif + +#ifdef RTE_LIBRTE_RING_DEBUG + struct rte_ring_debug_stats stats[RTE_MAX_LCORE]; +#endif + + void * ring[0] __rte_cache_aligned; /**< Memory space of ring starts here. + * not volatile so need to be careful + * about compiler re-ordering */ +}; + +#define RING_F_SP_ENQ 0x0001 /**< The default enqueue is "single-producer". */ +#define RING_F_SC_DEQ 0x0002 /**< The default dequeue is "single-consumer". */ +#define RTE_RING_QUOT_EXCEED (1 << 31) /**< Quota exceed for burst ops */ +#define RTE_RING_SZ_MASK (unsigned)(0x0fffffff) /**< Ring size mask */ + +/** + * @internal When debug is enabled, store ring statistics. + * @param r + * A pointer to the ring. + * @param name + * The name of the statistics field to increment in the ring. + * @param n + * The number to add to the object-oriented statistics. + */ +#ifdef RTE_LIBRTE_RING_DEBUG +#define __RING_STAT_ADD(r, name, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) { \ + r->stats[__lcore_id].name##_objs += n; \ + r->stats[__lcore_id].name##_bulk += 1; \ + } \ + } while(0) +#else +#define __RING_STAT_ADD(r, name, n) do {} while(0) +#endif + +/** + * Calculate the memory size needed for a ring + * + * This function returns the number of bytes needed for a ring, given + * the number of elements in it. This value is the sum of the size of + * the structure rte_ring and the size of the memory needed by the + * objects pointers. The value is aligned to a cache line size. + * + * @param count + * The number of elements in the ring (must be a power of 2). + * @return + * - The memory size needed for the ring on success. + * - -EINVAL if count is not a power of 2. + */ +ssize_t rte_ring_get_memsize(unsigned count); + +/** + * Initialize a ring structure. + * + * Initialize a ring structure in memory pointed by "r". The size of the + * memory area must be large enough to store the ring structure and the + * object table. It is advised to use rte_ring_get_memsize() to get the + * appropriate size. + * + * The ring size is set to *count*, which must be a power of two. Water + * marking is disabled by default. The real usable ring size is + * *count-1* instead of *count* to differentiate a free ring from an + * empty ring. + * + * The ring is not added in RTE_TAILQ_RING global list. Indeed, the + * memory given by the caller may not be shareable among dpdk + * processes. + * + * @param r + * The pointer to the ring structure followed by the objects table. + * @param name + * The name of the ring. + * @param count + * The number of elements in the ring (must be a power of 2). + * @param flags + * An OR of the following: + * - RING_F_SP_ENQ: If this flag is set, the default behavior when + * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()`` + * is "single-producer". Otherwise, it is "multi-producers". + * - RING_F_SC_DEQ: If this flag is set, the default behavior when + * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()`` + * is "single-consumer". Otherwise, it is "multi-consumers". + * @return + * 0 on success, or a negative value on error. + */ +int rte_ring_init(struct rte_ring *r, const char *name, unsigned count, + unsigned flags); + +/** + * Create a new ring named *name* in memory. + * + * This function uses ``memzone_reserve()`` to allocate memory. Then it + * calls rte_ring_init() to initialize an empty ring. + * + * The new ring size is set to *count*, which must be a power of + * two. Water marking is disabled by default. The real usable ring size + * is *count-1* instead of *count* to differentiate a free ring from an + * empty ring. + * + * The ring is added in RTE_TAILQ_RING list. + * + * @param name + * The name of the ring. + * @param count + * The size of the ring (must be a power of 2). + * @param socket_id + * The *socket_id* argument is the socket identifier in case of + * NUMA. The value can be *SOCKET_ID_ANY* if there is no NUMA + * constraint for the reserved zone. + * @param flags + * An OR of the following: + * - RING_F_SP_ENQ: If this flag is set, the default behavior when + * using ``rte_ring_enqueue()`` or ``rte_ring_enqueue_bulk()`` + * is "single-producer". Otherwise, it is "multi-producers". + * - RING_F_SC_DEQ: If this flag is set, the default behavior when + * using ``rte_ring_dequeue()`` or ``rte_ring_dequeue_bulk()`` + * is "single-consumer". Otherwise, it is "multi-consumers". + * @return + * On success, the pointer to the new allocated ring. NULL on error with + * rte_errno set appropriately. Possible errno values include: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - EINVAL - count provided is not a power of 2 + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + */ +struct rte_ring *rte_ring_create(const char *name, unsigned count, + int socket_id, unsigned flags); +/** + * De-allocate all memory used by the ring. + * + * @param r + * Ring to free + */ +void rte_ring_free(struct rte_ring *r); + +/** + * Change the high water mark. + * + * If *count* is 0, water marking is disabled. Otherwise, it is set to the + * *count* value. The *count* value must be greater than 0 and less + * than the ring size. + * + * This function can be called at any time (not necessarily at + * initialization). + * + * @param r + * A pointer to the ring structure. + * @param count + * The new water mark value. + * @return + * - 0: Success; water mark changed. + * - -EINVAL: Invalid water mark value. + */ +int rte_ring_set_water_mark(struct rte_ring *r, unsigned count); + +/** + * Dump the status of the ring to the console. + * + * @param f + * A pointer to a file for output + * @param r + * A pointer to the ring structure. + */ +void rte_ring_dump(FILE *f, const struct rte_ring *r); + +/* the actual enqueue of pointers on the ring. + * Placed here since identical code needed in both + * single and multi producer enqueue functions */ +#define ENQUEUE_PTRS() do { \ + const uint32_t size = r->prod.size; \ + uint32_t idx = prod_head & mask; \ + if (likely(idx + n < size)) { \ + for (i = 0; i < (n & ((~(unsigned)0x3))); i+=4, idx+=4) { \ + r->ring[idx] = obj_table[i]; \ + r->ring[idx+1] = obj_table[i+1]; \ + r->ring[idx+2] = obj_table[i+2]; \ + r->ring[idx+3] = obj_table[i+3]; \ + } \ + switch (n & 0x3) { \ + case 3: r->ring[idx++] = obj_table[i++]; \ + case 2: r->ring[idx++] = obj_table[i++]; \ + case 1: r->ring[idx++] = obj_table[i++]; \ + } \ + } else { \ + for (i = 0; idx < size; i++, idx++)\ + r->ring[idx] = obj_table[i]; \ + for (idx = 0; i < n; i++, idx++) \ + r->ring[idx] = obj_table[i]; \ + } \ +} while(0) + +/* the actual copy of pointers on the ring to obj_table. + * Placed here since identical code needed in both + * single and multi consumer dequeue functions */ +#define DEQUEUE_PTRS() do { \ + uint32_t idx = cons_head & mask; \ + const uint32_t size = r->cons.size; \ + if (likely(idx + n < size)) { \ + for (i = 0; i < (n & (~(unsigned)0x3)); i+=4, idx+=4) {\ + obj_table[i] = r->ring[idx]; \ + obj_table[i+1] = r->ring[idx+1]; \ + obj_table[i+2] = r->ring[idx+2]; \ + obj_table[i+3] = r->ring[idx+3]; \ + } \ + switch (n & 0x3) { \ + case 3: obj_table[i++] = r->ring[idx++]; \ + case 2: obj_table[i++] = r->ring[idx++]; \ + case 1: obj_table[i++] = r->ring[idx++]; \ + } \ + } else { \ + for (i = 0; idx < size; i++, idx++) \ + obj_table[i] = r->ring[idx]; \ + for (idx = 0; i < n; i++, idx++) \ + obj_table[i] = r->ring[idx]; \ + } \ +} while (0) + +/** + * @internal Enqueue several objects on the ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects enqueue. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects enqueued. + */ +static inline int __attribute__((always_inline)) +__rte_ring_mp_do_enqueue(struct rte_ring *r, void * const *obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t prod_head, prod_next; + uint32_t cons_tail, free_entries; + const unsigned max = n; + int success; + unsigned i, rep = 0; + uint32_t mask = r->prod.mask; + int ret; + + /* Avoid the unnecessary cmpset operation below, which is also + * potentially harmful when n equals 0. */ + if (n == 0) + return 0; + + /* move prod.head atomically */ + do { + /* Reset n to the initial burst count */ + n = max; + + prod_head = r->prod.head; + cons_tail = r->cons.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * prod_head > cons_tail). So 'free_entries' is always between 0 + * and size(ring)-1. */ + free_entries = (mask + cons_tail - prod_head); + + /* check that we have enough room in ring */ + if (unlikely(n > free_entries)) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, enq_fail, n); + return -ENOBUFS; + } + else { + /* No free entry available */ + if (unlikely(free_entries == 0)) { + __RING_STAT_ADD(r, enq_fail, n); + return 0; + } + + n = free_entries; + } + } + + prod_next = prod_head + n; + success = rte_atomic32_cmpset(&r->prod.head, prod_head, + prod_next); + } while (unlikely(success == 0)); + + /* write entries in ring */ + ENQUEUE_PTRS(); + rte_smp_wmb(); + + /* if we exceed the watermark */ + if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT : + (int)(n | RTE_RING_QUOT_EXCEED); + __RING_STAT_ADD(r, enq_quota, n); + } + else { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n; + __RING_STAT_ADD(r, enq_success, n); + } + + /* + * If there are other enqueues in progress that preceded us, + * we need to wait for them to complete + */ + while (unlikely(r->prod.tail != prod_head)) { + rte_pause(); + + /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting + * for other thread finish. It gives pre-empted thread a chance + * to proceed and finish with ring dequeue operation. */ + if (RTE_RING_PAUSE_REP_COUNT && + ++rep == RTE_RING_PAUSE_REP_COUNT) { + rep = 0; + sched_yield(); + } + } + r->prod.tail = prod_next; + return ret; +} + +/** + * @internal Enqueue several objects on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Enqueue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Enqueue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects enqueue. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects enqueued. + */ +static inline int __attribute__((always_inline)) +__rte_ring_sp_do_enqueue(struct rte_ring *r, void * const *obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t prod_head, cons_tail; + uint32_t prod_next, free_entries; + unsigned i; + uint32_t mask = r->prod.mask; + int ret; + + prod_head = r->prod.head; + cons_tail = r->cons.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * prod_head > cons_tail). So 'free_entries' is always between 0 + * and size(ring)-1. */ + free_entries = mask + cons_tail - prod_head; + + /* check that we have enough room in ring */ + if (unlikely(n > free_entries)) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, enq_fail, n); + return -ENOBUFS; + } + else { + /* No free entry available */ + if (unlikely(free_entries == 0)) { + __RING_STAT_ADD(r, enq_fail, n); + return 0; + } + + n = free_entries; + } + } + + prod_next = prod_head + n; + r->prod.head = prod_next; + + /* write entries in ring */ + ENQUEUE_PTRS(); + rte_smp_wmb(); + + /* if we exceed the watermark */ + if (unlikely(((mask + 1) - free_entries + n) > r->prod.watermark)) { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? -EDQUOT : + (int)(n | RTE_RING_QUOT_EXCEED); + __RING_STAT_ADD(r, enq_quota, n); + } + else { + ret = (behavior == RTE_RING_QUEUE_FIXED) ? 0 : n; + __RING_STAT_ADD(r, enq_success, n); + } + + r->prod.tail = prod_next; + return ret; +} + +/** + * @internal Dequeue several objects from a ring (multi-consumers safe). When + * the request objects are more than the available objects, only dequeue the + * actual number of objects + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects dequeued. + */ + +static inline int __attribute__((always_inline)) +__rte_ring_mc_do_dequeue(struct rte_ring *r, void **obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t cons_head, prod_tail; + uint32_t cons_next, entries; + const unsigned max = n; + int success; + unsigned i, rep = 0; + uint32_t mask = r->prod.mask; + + /* Avoid the unnecessary cmpset operation below, which is also + * potentially harmful when n equals 0. */ + if (n == 0) + return 0; + + /* move cons.head atomically */ + do { + /* Restore n as it may change every loop */ + n = max; + + cons_head = r->cons.head; + prod_tail = r->prod.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * cons_head > prod_tail). So 'entries' is always between 0 + * and size(ring)-1. */ + entries = (prod_tail - cons_head); + + /* Set the actual entries for dequeue */ + if (n > entries) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, deq_fail, n); + return -ENOENT; + } + else { + if (unlikely(entries == 0)){ + __RING_STAT_ADD(r, deq_fail, n); + return 0; + } + + n = entries; + } + } + + cons_next = cons_head + n; + success = rte_atomic32_cmpset(&r->cons.head, cons_head, + cons_next); + } while (unlikely(success == 0)); + + /* copy in table */ + DEQUEUE_PTRS(); + rte_smp_rmb(); + + /* + * If there are other dequeues in progress that preceded us, + * we need to wait for them to complete + */ + while (unlikely(r->cons.tail != cons_head)) { + rte_pause(); + + /* Set RTE_RING_PAUSE_REP_COUNT to avoid spin too long waiting + * for other thread finish. It gives pre-empted thread a chance + * to proceed and finish with ring dequeue operation. */ + if (RTE_RING_PAUSE_REP_COUNT && + ++rep == RTE_RING_PAUSE_REP_COUNT) { + rep = 0; + sched_yield(); + } + } + __RING_STAT_ADD(r, deq_success, n); + r->cons.tail = cons_next; + + return behavior == RTE_RING_QUEUE_FIXED ? 0 : n; +} + +/** + * @internal Dequeue several objects from a ring (NOT multi-consumers safe). + * When the request objects are more than the available objects, only dequeue + * the actual number of objects + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @param behavior + * RTE_RING_QUEUE_FIXED: Dequeue a fixed number of items from a ring + * RTE_RING_QUEUE_VARIABLE: Dequeue as many items a possible from ring + * @return + * Depend on the behavior value + * if behavior = RTE_RING_QUEUE_FIXED + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + * if behavior = RTE_RING_QUEUE_VARIABLE + * - n: Actual number of objects dequeued. + */ +static inline int __attribute__((always_inline)) +__rte_ring_sc_do_dequeue(struct rte_ring *r, void **obj_table, + unsigned n, enum rte_ring_queue_behavior behavior) +{ + uint32_t cons_head, prod_tail; + uint32_t cons_next, entries; + unsigned i; + uint32_t mask = r->prod.mask; + + cons_head = r->cons.head; + prod_tail = r->prod.tail; + /* The subtraction is done between two unsigned 32bits value + * (the result is always modulo 32 bits even if we have + * cons_head > prod_tail). So 'entries' is always between 0 + * and size(ring)-1. */ + entries = prod_tail - cons_head; + + if (n > entries) { + if (behavior == RTE_RING_QUEUE_FIXED) { + __RING_STAT_ADD(r, deq_fail, n); + return -ENOENT; + } + else { + if (unlikely(entries == 0)){ + __RING_STAT_ADD(r, deq_fail, n); + return 0; + } + + n = entries; + } + } + + cons_next = cons_head + n; + r->cons.head = cons_next; + + /* copy in table */ + DEQUEUE_PTRS(); + rte_smp_rmb(); + + __RING_STAT_ADD(r, deq_success, n); + r->cons.tail = cons_next; + return behavior == RTE_RING_QUEUE_FIXED ? 0 : n; +} + +/** + * Enqueue several objects on the ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - 0: Success; objects enqueue. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue, no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Enqueue several objects on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sp_enqueue_bulk(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Enqueue several objects on a ring. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_enqueue_bulk(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + if (r->prod.sp_enqueue) + return rte_ring_sp_enqueue_bulk(r, obj_table, n); + else + return rte_ring_mp_enqueue_bulk(r, obj_table, n); +} + +/** + * Enqueue one object on a ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj + * A pointer to the object to be added. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mp_enqueue(struct rte_ring *r, void *obj) +{ + return rte_ring_mp_enqueue_bulk(r, &obj, 1); +} + +/** + * Enqueue one object on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj + * A pointer to the object to be added. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sp_enqueue(struct rte_ring *r, void *obj) +{ + return rte_ring_sp_enqueue_bulk(r, &obj, 1); +} + +/** + * Enqueue one object on a ring. + * + * This function calls the multi-producer or the single-producer + * version, depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj + * A pointer to the object to be added. + * @return + * - 0: Success; objects enqueued. + * - -EDQUOT: Quota exceeded. The objects have been enqueued, but the + * high water mark is exceeded. + * - -ENOBUFS: Not enough room in the ring to enqueue; no object is enqueued. + */ +static inline int __attribute__((always_inline)) +rte_ring_enqueue(struct rte_ring *r, void *obj) +{ + if (r->prod.sp_enqueue) + return rte_ring_sp_enqueue(r, obj); + else + return rte_ring_mp_enqueue(r, obj); +} + +/** + * Dequeue several objects from a ring (multi-consumers safe). + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Dequeue several objects from a ring (NOT multi-consumers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table, + * must be strictly positive. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sc_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_FIXED); +} + +/** + * Dequeue several objects from a ring. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue, no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_dequeue_bulk(struct rte_ring *r, void **obj_table, unsigned n) +{ + if (r->cons.sc_dequeue) + return rte_ring_sc_dequeue_bulk(r, obj_table, n); + else + return rte_ring_mc_dequeue_bulk(r, obj_table, n); +} + +/** + * Dequeue one object from a ring (multi-consumers safe). + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue; no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_mc_dequeue(struct rte_ring *r, void **obj_p) +{ + return rte_ring_mc_dequeue_bulk(r, obj_p, 1); +} + +/** + * Dequeue one object from a ring (NOT multi-consumers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success; objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue, no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_sc_dequeue(struct rte_ring *r, void **obj_p) +{ + return rte_ring_sc_dequeue_bulk(r, obj_p, 1); +} + +/** + * Dequeue one object from a ring. + * + * This function calls the multi-consumers or the single-consumer + * version depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_p + * A pointer to a void * pointer (object) that will be filled. + * @return + * - 0: Success, objects dequeued. + * - -ENOENT: Not enough entries in the ring to dequeue, no object is + * dequeued. + */ +static inline int __attribute__((always_inline)) +rte_ring_dequeue(struct rte_ring *r, void **obj_p) +{ + if (r->cons.sc_dequeue) + return rte_ring_sc_dequeue(r, obj_p); + else + return rte_ring_mc_dequeue(r, obj_p); +} + +/** + * Test if a ring is full. + * + * @param r + * A pointer to the ring structure. + * @return + * - 1: The ring is full. + * - 0: The ring is not full. + */ +static inline int +rte_ring_full(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return ((cons_tail - prod_tail - 1) & r->prod.mask) == 0; +} + +/** + * Test if a ring is empty. + * + * @param r + * A pointer to the ring structure. + * @return + * - 1: The ring is empty. + * - 0: The ring is not empty. + */ +static inline int +rte_ring_empty(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return !!(cons_tail == prod_tail); +} + +/** + * Return the number of entries in a ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The number of entries in the ring. + */ +static inline unsigned +rte_ring_count(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return (prod_tail - cons_tail) & r->prod.mask; +} + +/** + * Return the number of free entries in a ring. + * + * @param r + * A pointer to the ring structure. + * @return + * The number of free entries in the ring. + */ +static inline unsigned +rte_ring_free_count(const struct rte_ring *r) +{ + uint32_t prod_tail = r->prod.tail; + uint32_t cons_tail = r->cons.tail; + return (cons_tail - prod_tail - 1) & r->prod.mask; +} + +/** + * Dump the status of all rings on the console + * + * @param f + * A pointer to a file for output + */ +void rte_ring_list_dump(FILE *f); + +/** + * Search a ring from its name + * + * @param name + * The name of the ring. + * @return + * The pointer to the ring matching the name, or NULL if not found, + * with rte_errno set appropriately. Possible rte_errno values include: + * - ENOENT - required entry not available to return. + */ +struct rte_ring *rte_ring_lookup(const char *name); + +/** + * Enqueue several objects on the ring (multi-producers safe). + * + * This function uses a "compare and set" instruction to move the + * producer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - n: Actual number of objects enqueued. + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_mp_enqueue_burst(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_mp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Enqueue several objects on a ring (NOT multi-producers safe). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - n: Actual number of objects enqueued. + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_sp_enqueue_burst(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + return __rte_ring_sp_do_enqueue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Enqueue several objects on a ring. + * + * This function calls the multi-producer or the single-producer + * version depending on the default behavior that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects). + * @param n + * The number of objects to add in the ring from the obj_table. + * @return + * - n: Actual number of objects enqueued. + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_enqueue_burst(struct rte_ring *r, void * const *obj_table, + unsigned n) +{ + if (r->prod.sp_enqueue) + return rte_ring_sp_enqueue_burst(r, obj_table, n); + else + return rte_ring_mp_enqueue_burst(r, obj_table, n); +} + +/** + * Dequeue several objects from a ring (multi-consumers safe). When the request + * objects are more than the available objects, only dequeue the actual number + * of objects + * + * This function uses a "compare and set" instruction to move the + * consumer index atomically. + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - n: Actual number of objects dequeued, 0 if ring is empty + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_mc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_mc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Dequeue several objects from a ring (NOT multi-consumers safe).When the + * request objects are more than the available objects, only dequeue the + * actual number of objects + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - n: Actual number of objects dequeued, 0 if ring is empty + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_sc_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +{ + return __rte_ring_sc_do_dequeue(r, obj_table, n, RTE_RING_QUEUE_VARIABLE); +} + +/** + * Dequeue multiple objects from a ring up to a maximum number. + * + * This function calls the multi-consumers or the single-consumer + * version, depending on the default behaviour that was specified at + * ring creation time (see flags). + * + * @param r + * A pointer to the ring structure. + * @param obj_table + * A pointer to a table of void * pointers (objects) that will be filled. + * @param n + * The number of objects to dequeue from the ring to the obj_table. + * @return + * - Number of objects dequeued + */ +static inline unsigned __attribute__((always_inline)) +rte_ring_dequeue_burst(struct rte_ring *r, void **obj_table, unsigned n) +{ + if (r->cons.sc_dequeue) + return rte_ring_sc_dequeue_burst(r, obj_table, n); + else + return rte_ring_mc_dequeue_burst(r, obj_table, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RING_H_ */ diff --git a/lib/librte_ring/rte_ring_version.map b/lib/librte_ring/rte_ring_version.map new file mode 100644 index 00000000..5474b985 --- /dev/null +++ b/lib/librte_ring/rte_ring_version.map @@ -0,0 +1,20 @@ +DPDK_2.0 { + global: + + rte_ring_create; + rte_ring_dump; + rte_ring_get_memsize; + rte_ring_init; + rte_ring_list_dump; + rte_ring_lookup; + rte_ring_set_water_mark; + + local: *; +}; + +DPDK_2.2 { + global: + + rte_ring_free; + +} DPDK_2.0; diff --git a/lib/librte_sched/Makefile b/lib/librte_sched/Makefile new file mode 100644 index 00000000..a782cd1a --- /dev/null +++ b/lib/librte_sched/Makefile @@ -0,0 +1,65 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_sched.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +CFLAGS_rte_red.o := -D_GNU_SOURCE + +LDLIBS += -lm +LDLIBS += -lrt + +EXPORT_MAP := rte_sched_version.map + +LIBABIVER := 1 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_sched.c rte_red.c rte_approx.c +SRCS-$(CONFIG_RTE_LIBRTE_SCHED) += rte_reciprocal.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include := rte_sched.h rte_bitmap.h rte_sched_common.h rte_red.h rte_approx.h +SYMLINK-$(CONFIG_RTE_LIBRTE_SCHED)-include += rte_reciprocal.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_mempool lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_SCHED) += lib/librte_net lib/librte_timer + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_sched/rte_approx.c b/lib/librte_sched/rte_approx.c new file mode 100644 index 00000000..739f37d7 --- /dev/null +++ b/lib/librte_sched/rte_approx.c @@ -0,0 +1,196 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include "rte_approx.h" + +/* + * Based on paper "Approximating Rational Numbers by Fractions" by Michal + * Forisek forisek@dcs.fmph.uniba.sk + * + * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal + * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and + * q is minimal. + * + * http://people.ksp.sk/~misof/publications/2007approx.pdf + */ + +/* fraction comparison: compare (a/b) and (c/d) */ +static inline uint32_t +less(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + return a*d < b*c; +} + +static inline uint32_t +less_or_equal(uint32_t a, uint32_t b, uint32_t c, uint32_t d) +{ + return a*d <= b*c; +} + +/* check whether a/b is a valid approximation */ +static inline uint32_t +matches(uint32_t a, uint32_t b, + uint32_t alpha_num, uint32_t d_num, uint32_t denum) +{ + if (less_or_equal(a, b, alpha_num - d_num, denum)) + return 0; + + if (less(a ,b, alpha_num + d_num, denum)) + return 1; + + return 0; +} + +static inline void +find_exact_solution_left(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b, + uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) +{ + uint32_t k_num = denum * p_b - (alpha_num + d_num) * q_b; + uint32_t k_denum = (alpha_num + d_num) * q_a - denum * p_a; + uint32_t k = (k_num / k_denum) + 1; + + *p = p_b + k * p_a; + *q = q_b + k * q_a; +} + +static inline void +find_exact_solution_right(uint32_t p_a, uint32_t q_a, uint32_t p_b, uint32_t q_b, + uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) +{ + uint32_t k_num = - denum * p_b + (alpha_num - d_num) * q_b; + uint32_t k_denum = - (alpha_num - d_num) * q_a + denum * p_a; + uint32_t k = (k_num / k_denum) + 1; + + *p = p_b + k * p_a; + *q = q_b + k * q_a; +} + +static int +find_best_rational_approximation(uint32_t alpha_num, uint32_t d_num, uint32_t denum, uint32_t *p, uint32_t *q) +{ + uint32_t p_a, q_a, p_b, q_b; + + /* check assumptions on the inputs */ + if (!((0 < d_num) && (d_num < alpha_num) && (alpha_num < denum) && (d_num + alpha_num < denum))) { + return -1; + } + + /* set initial bounds for the search */ + p_a = 0; + q_a = 1; + p_b = 1; + q_b = 1; + + while (1) { + uint32_t new_p_a, new_q_a, new_p_b, new_q_b; + uint32_t x_num, x_denum, x; + int aa, bb; + + /* compute the number of steps to the left */ + x_num = denum * p_b - alpha_num * q_b; + x_denum = - denum * p_a + alpha_num * q_a; + x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */ + + /* check whether we have a valid approximation */ + aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum); + bb = matches(p_b + (x-1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum); + if (aa || bb) { + find_exact_solution_left(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q); + return 0; + } + + /* update the interval */ + new_p_a = p_b + (x - 1) * p_a ; + new_q_a = q_b + (x - 1) * q_a; + new_p_b = p_b + x * p_a ; + new_q_b = q_b + x * q_a; + + p_a = new_p_a ; + q_a = new_q_a; + p_b = new_p_b ; + q_b = new_q_b; + + /* compute the number of steps to the right */ + x_num = alpha_num * q_b - denum * p_b; + x_denum = - alpha_num * q_a + denum * p_a; + x = (x_num + x_denum - 1) / x_denum; /* x = ceil(x_num / x_denum) */ + + /* check whether we have a valid approximation */ + aa = matches(p_b + x * p_a, q_b + x * q_a, alpha_num, d_num, denum); + bb = matches(p_b + (x - 1) * p_a, q_b + (x - 1) * q_a, alpha_num, d_num, denum); + if (aa || bb) { + find_exact_solution_right(p_a, q_a, p_b, q_b, alpha_num, d_num, denum, p, q); + return 0; + } + + /* update the interval */ + new_p_a = p_b + (x - 1) * p_a; + new_q_a = q_b + (x - 1) * q_a; + new_p_b = p_b + x * p_a; + new_q_b = q_b + x * q_a; + + p_a = new_p_a; + q_a = new_q_a; + p_b = new_p_b; + q_b = new_q_b; + } +} + +int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q) +{ + uint32_t alpha_num, d_num, denum; + + /* Check input arguments */ + if (!((0.0 < d) && (d < alpha) && (alpha < 1.0))) { + return -1; + } + + if ((p == NULL) || (q == NULL)) { + return -2; + } + + /* Compute alpha_num, d_num and denum */ + denum = 1; + while (d < 1) { + alpha *= 10; + d *= 10; + denum *= 10; + } + alpha_num = (uint32_t) alpha; + d_num = (uint32_t) d; + + /* Perform approximation */ + return find_best_rational_approximation(alpha_num, d_num, denum, p, q); +} diff --git a/lib/librte_sched/rte_approx.h b/lib/librte_sched/rte_approx.h new file mode 100644 index 00000000..09f30a87 --- /dev/null +++ b/lib/librte_sched/rte_approx.h @@ -0,0 +1,75 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_APPROX_H__ +#define __INCLUDE_RTE_APPROX_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Rational Approximation + * + * Given a rational number alpha with 0 < alpha < 1 and a precision d, the goal + * is to find positive integers p, q such that alpha - d < p/q < alpha + d, and + * q is minimal. + * + ***/ + +#include + +/** + * Find best rational approximation + * + * @param alpha + * Rational number to approximate + * @param d + * Precision for the rational approximation + * @param p + * Pointer to pre-allocated space where the numerator of the rational + * approximation will be stored when operation is successful + * @param q + * Pointer to pre-allocated space where the denominator of the rational + * approximation will be stored when operation is successful + * @return + * 0 upon success, error code otherwise + */ +int rte_approx(double alpha, double d, uint32_t *p, uint32_t *q); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_APPROX_H__ */ diff --git a/lib/librte_sched/rte_bitmap.h b/lib/librte_sched/rte_bitmap.h new file mode 100644 index 00000000..ff675c58 --- /dev/null +++ b/lib/librte_sched/rte_bitmap.h @@ -0,0 +1,560 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_BITMAP_H__ +#define __INCLUDE_RTE_BITMAP_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Bitmap + * + * The bitmap component provides a mechanism to manage large arrays of bits + * through bit get/set/clear and bit array scan operations. + * + * The bitmap scan operation is optimized for 64-bit CPUs using 64/128 byte cache + * lines. The bitmap is hierarchically organized using two arrays (array1 and + * array2), with each bit in array1 being associated with a full cache line + * (512/1024 bits) of bitmap bits, which are stored in array2: the bit in array1 + * is set only when there is at least one bit set within its associated array2 + * bits, otherwise the bit in array1 is cleared. The read and write operations + * for array1 and array2 are always done in slabs of 64 bits. + * + * This bitmap is not thread safe. For lock free operation on a specific bitmap + * instance, a single writer thread performing bit set/clear operations is + * allowed, only the writer thread can do bitmap scan operations, while there + * can be several reader threads performing bit get operations in parallel with + * the writer thread. When the use of locking primitives is acceptable, the + * serialization of the bit set/clear and bitmap scan operations needs to be + * enforced by the caller, while the bit get operation does not require locking + * the bitmap. + * + ***/ + +#include +#include +#include +#include +#include + +#ifndef RTE_BITMAP_OPTIMIZATIONS +#define RTE_BITMAP_OPTIMIZATIONS 1 +#endif + +/* Slab */ +#define RTE_BITMAP_SLAB_BIT_SIZE 64 +#define RTE_BITMAP_SLAB_BIT_SIZE_LOG2 6 +#define RTE_BITMAP_SLAB_BIT_MASK (RTE_BITMAP_SLAB_BIT_SIZE - 1) + +/* Cache line (CL) */ +#define RTE_BITMAP_CL_BIT_SIZE (RTE_CACHE_LINE_SIZE * 8) +#define RTE_BITMAP_CL_BIT_SIZE_LOG2 (RTE_CACHE_LINE_SIZE_LOG2 + 3) +#define RTE_BITMAP_CL_BIT_MASK (RTE_BITMAP_CL_BIT_SIZE - 1) + +#define RTE_BITMAP_CL_SLAB_SIZE (RTE_BITMAP_CL_BIT_SIZE / RTE_BITMAP_SLAB_BIT_SIZE) +#define RTE_BITMAP_CL_SLAB_SIZE_LOG2 (RTE_BITMAP_CL_BIT_SIZE_LOG2 - RTE_BITMAP_SLAB_BIT_SIZE_LOG2) +#define RTE_BITMAP_CL_SLAB_MASK (RTE_BITMAP_CL_SLAB_SIZE - 1) + +/** Bitmap data structure */ +struct rte_bitmap { + /* Context for array1 and array2 */ + uint64_t *array1; /**< Bitmap array1 */ + uint64_t *array2; /**< Bitmap array2 */ + uint32_t array1_size; /**< Number of 64-bit slabs in array1 that are actually used */ + uint32_t array2_size; /**< Number of 64-bit slabs in array2 */ + + /* Context for the "scan next" operation */ + uint32_t index1; /**< Bitmap scan: Index of current array1 slab */ + uint32_t offset1; /**< Bitmap scan: Offset of current bit within current array1 slab */ + uint32_t index2; /**< Bitmap scan: Index of current array2 slab */ + uint32_t go2; /**< Bitmap scan: Go/stop condition for current array2 cache line */ + + /* Storage space for array1 and array2 */ + uint8_t memory[0]; +}; + +static inline void +__rte_bitmap_index1_inc(struct rte_bitmap *bmp) +{ + bmp->index1 = (bmp->index1 + 1) & (bmp->array1_size - 1); +} + +static inline uint64_t +__rte_bitmap_mask1_get(struct rte_bitmap *bmp) +{ + return (~1lu) << bmp->offset1; +} + +static inline void +__rte_bitmap_index2_set(struct rte_bitmap *bmp) +{ + bmp->index2 = (((bmp->index1 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2) + bmp->offset1) << RTE_BITMAP_CL_SLAB_SIZE_LOG2); +} + +#if RTE_BITMAP_OPTIMIZATIONS + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + if (likely(slab == 0)) { + return 0; + } + + *pos = __builtin_ctzll(slab); + return 1; +} + +#else + +static inline int +rte_bsf64(uint64_t slab, uint32_t *pos) +{ + uint64_t mask; + uint32_t i; + + if (likely(slab == 0)) { + return 0; + } + + for (i = 0, mask = 1; i < RTE_BITMAP_SLAB_BIT_SIZE; i ++, mask <<= 1) { + if (unlikely(slab & mask)) { + *pos = i; + return 1; + } + } + + return 0; +} + +#endif + +static inline uint32_t +__rte_bitmap_get_memory_footprint(uint32_t n_bits, + uint32_t *array1_byte_offset, uint32_t *array1_slabs, + uint32_t *array2_byte_offset, uint32_t *array2_slabs) +{ + uint32_t n_slabs_context, n_slabs_array1, n_cache_lines_context_and_array1; + uint32_t n_cache_lines_array2; + uint32_t n_bytes_total; + + n_cache_lines_array2 = (n_bits + RTE_BITMAP_CL_BIT_SIZE - 1) / RTE_BITMAP_CL_BIT_SIZE; + n_slabs_array1 = (n_cache_lines_array2 + RTE_BITMAP_SLAB_BIT_SIZE - 1) / RTE_BITMAP_SLAB_BIT_SIZE; + n_slabs_array1 = rte_align32pow2(n_slabs_array1); + n_slabs_context = (sizeof(struct rte_bitmap) + (RTE_BITMAP_SLAB_BIT_SIZE / 8) - 1) / (RTE_BITMAP_SLAB_BIT_SIZE / 8); + n_cache_lines_context_and_array1 = (n_slabs_context + n_slabs_array1 + RTE_BITMAP_CL_SLAB_SIZE - 1) / RTE_BITMAP_CL_SLAB_SIZE; + n_bytes_total = (n_cache_lines_context_and_array1 + n_cache_lines_array2) * RTE_CACHE_LINE_SIZE; + + if (array1_byte_offset) { + *array1_byte_offset = n_slabs_context * (RTE_BITMAP_SLAB_BIT_SIZE / 8); + } + if (array1_slabs) { + *array1_slabs = n_slabs_array1; + } + if (array2_byte_offset) { + *array2_byte_offset = n_cache_lines_context_and_array1 * RTE_CACHE_LINE_SIZE; + } + if (array2_slabs) { + *array2_slabs = n_cache_lines_array2 * RTE_BITMAP_CL_SLAB_SIZE; + } + + return n_bytes_total; +} + +static inline void +__rte_bitmap_scan_init(struct rte_bitmap *bmp) +{ + bmp->index1 = bmp->array1_size - 1; + bmp->offset1 = RTE_BITMAP_SLAB_BIT_SIZE - 1; + __rte_bitmap_index2_set(bmp); + bmp->index2 += RTE_BITMAP_CL_SLAB_SIZE; + + bmp->go2 = 0; +} + +/** + * Bitmap memory footprint calculation + * + * @param n_bits + * Number of bits in the bitmap + * @return + * Bitmap memory footprint measured in bytes on success, 0 on error + */ +static inline uint32_t +rte_bitmap_get_memory_footprint(uint32_t n_bits) { + /* Check input arguments */ + if (n_bits == 0) { + return 0; + } + + return __rte_bitmap_get_memory_footprint(n_bits, NULL, NULL, NULL, NULL); +} + +/** + * Bitmap initialization + * + * @param mem_size + * Minimum expected size of bitmap. + * @param mem + * Base address of array1 and array2. + * @param n_bits + * Number of pre-allocated bits in array2. Must be non-zero and multiple of 512. + * @return + * Handle to bitmap instance. + */ +static inline struct rte_bitmap * +rte_bitmap_init(uint32_t n_bits, uint8_t *mem, uint32_t mem_size) +{ + struct rte_bitmap *bmp; + uint32_t array1_byte_offset, array1_slabs, array2_byte_offset, array2_slabs; + uint32_t size; + + /* Check input arguments */ + if (n_bits == 0) { + return NULL; + } + + if ((mem == NULL) || (((uintptr_t) mem) & RTE_CACHE_LINE_MASK)) { + return NULL; + } + + size = __rte_bitmap_get_memory_footprint(n_bits, + &array1_byte_offset, &array1_slabs, + &array2_byte_offset, &array2_slabs); + if (size < mem_size) { + return NULL; + } + + /* Setup bitmap */ + memset(mem, 0, size); + bmp = (struct rte_bitmap *) mem; + + bmp->array1 = (uint64_t *) &mem[array1_byte_offset]; + bmp->array1_size = array1_slabs; + bmp->array2 = (uint64_t *) &mem[array2_byte_offset]; + bmp->array2_size = array2_slabs; + + __rte_bitmap_scan_init(bmp); + + return bmp; +} + +/** + * Bitmap free + * + * @param bmp + * Handle to bitmap instance + * @return + * 0 upon success, error code otherwise + */ +static inline int +rte_bitmap_free(struct rte_bitmap *bmp) +{ + /* Check input arguments */ + if (bmp == NULL) { + return -1; + } + + return 0; +} + +/** + * Bitmap reset + * + * @param bmp + * Handle to bitmap instance + */ +static inline void +rte_bitmap_reset(struct rte_bitmap *bmp) +{ + memset(bmp->array1, 0, bmp->array1_size * sizeof(uint64_t)); + memset(bmp->array2, 0, bmp->array2_size * sizeof(uint64_t)); + __rte_bitmap_scan_init(bmp); +} + +/** + * Bitmap location prefetch into CPU L1 cache + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 upon success, error code otherwise + */ +static inline void +rte_bitmap_prefetch0(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + slab2 = bmp->array2 + index2; + rte_prefetch0((void *) slab2); +} + +/** + * Bitmap bit get + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + * @return + * 0 when bit is cleared, non-zero when bit is set + */ +static inline uint64_t +rte_bitmap_get(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab2; + uint32_t index2, offset2; + + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + return (*slab2) & (1lu << offset2); +} + +/** + * Bitmap bit set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_set(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Set bit in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= 1lu << offset2; + *slab1 |= 1lu << offset1; +} + +/** + * Bitmap slab set + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position identifying the array2 slab + * @param slab + * Value to be assigned to the 64-bit slab in array2 + */ +static inline void +rte_bitmap_set_slab(struct rte_bitmap *bmp, uint32_t pos, uint64_t slab) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1; + + /* Set bits in array2 slab and set bit in array1 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + slab1 = bmp->array1 + index1; + + *slab2 |= slab; + *slab1 |= 1lu << offset1; +} + +static inline uint64_t +__rte_bitmap_line_not_empty(uint64_t *slab2) +{ + uint64_t v1, v2, v3, v4; + + v1 = slab2[0] | slab2[1]; + v2 = slab2[2] | slab2[3]; + v3 = slab2[4] | slab2[5]; + v4 = slab2[6] | slab2[7]; + v1 |= v2; + v3 |= v4; + + return v1 | v3; +} + +/** + * Bitmap bit clear + * + * @param bmp + * Handle to bitmap instance + * @param pos + * Bit position + */ +static inline void +rte_bitmap_clear(struct rte_bitmap *bmp, uint32_t pos) +{ + uint64_t *slab1, *slab2; + uint32_t index1, index2, offset1, offset2; + + /* Clear bit in array2 slab */ + index2 = pos >> RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + offset2 = pos & RTE_BITMAP_SLAB_BIT_MASK; + slab2 = bmp->array2 + index2; + + /* Return if array2 slab is not all-zeros */ + *slab2 &= ~(1lu << offset2); + if (*slab2){ + return; + } + + /* Check the entire cache line of array2 for all-zeros */ + index2 &= ~ RTE_BITMAP_CL_SLAB_MASK; + slab2 = bmp->array2 + index2; + if (__rte_bitmap_line_not_empty(slab2)) { + return; + } + + /* The array2 cache line is all-zeros, so clear bit in array1 slab */ + index1 = pos >> (RTE_BITMAP_SLAB_BIT_SIZE_LOG2 + RTE_BITMAP_CL_BIT_SIZE_LOG2); + offset1 = (pos >> RTE_BITMAP_CL_BIT_SIZE_LOG2) & RTE_BITMAP_SLAB_BIT_MASK; + slab1 = bmp->array1 + index1; + *slab1 &= ~(1lu << offset1); + + return; +} + +static inline int +__rte_bitmap_scan_search(struct rte_bitmap *bmp) +{ + uint64_t value1; + uint32_t i; + + /* Check current array1 slab */ + value1 = bmp->array1[bmp->index1]; + value1 &= __rte_bitmap_mask1_get(bmp); + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + + __rte_bitmap_index1_inc(bmp); + bmp->offset1 = 0; + + /* Look for another array1 slab */ + for (i = 0; i < bmp->array1_size; i ++, __rte_bitmap_index1_inc(bmp)) { + value1 = bmp->array1[bmp->index1]; + + if (rte_bsf64(value1, &bmp->offset1)) { + return 1; + } + } + + return 0; +} + +static inline void +__rte_bitmap_scan_read_init(struct rte_bitmap *bmp) +{ + __rte_bitmap_index2_set(bmp); + bmp->go2 = 1; + rte_prefetch1((void *)(bmp->array2 + bmp->index2 + 8)); +} + +static inline int +__rte_bitmap_scan_read(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + uint64_t *slab2; + + slab2 = bmp->array2 + bmp->index2; + for ( ; bmp->go2 ; bmp->index2 ++, slab2 ++, bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK) { + if (*slab2) { + *pos = bmp->index2 << RTE_BITMAP_SLAB_BIT_SIZE_LOG2; + *slab = *slab2; + + bmp->index2 ++; + slab2 ++; + bmp->go2 = bmp->index2 & RTE_BITMAP_CL_SLAB_MASK; + return 1; + } + } + + return 0; +} + +/** + * Bitmap scan (with automatic wrap-around) + * + * @param bmp + * Handle to bitmap instance + * @param pos + * When function call returns 1, pos contains the position of the next set + * bit, otherwise not modified + * @param slab + * When function call returns 1, slab contains the value of the entire 64-bit + * slab where the bit indicated by pos is located. Slabs are always 64-bit + * aligned, so the position of the first bit of the slab (this bit is not + * necessarily set) is pos / 64. Once a slab has been returned by the bitmap + * scan operation, the internal pointers of the bitmap are updated to point + * after this slab, so the same slab will not be returned again if it + * contains more than one bit which is set. When function call returns 0, + * slab is not modified. + * @return + * 0 if there is no bit set in the bitmap, 1 otherwise + */ +static inline int +rte_bitmap_scan(struct rte_bitmap *bmp, uint32_t *pos, uint64_t *slab) +{ + /* Return data from current array2 line if available */ + if (__rte_bitmap_scan_read(bmp, pos, slab)) { + return 1; + } + + /* Look for non-empty array2 line */ + if (__rte_bitmap_scan_search(bmp)) { + __rte_bitmap_scan_read_init(bmp); + __rte_bitmap_scan_read(bmp, pos, slab); + return 1; + } + + /* Empty bitmap */ + return 0; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_BITMAP_H__ */ diff --git a/lib/librte_sched/rte_reciprocal.c b/lib/librte_sched/rte_reciprocal.c new file mode 100644 index 00000000..652f0237 --- /dev/null +++ b/lib/librte_sched/rte_reciprocal.c @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) Hannes Frederic Sowa + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include + +#include "rte_reciprocal.h" + +/* find largest set bit. + * portable and slow but does not matter for this usage. + */ +static inline int fls(uint32_t x) +{ + int b; + + for (b = 31; b >= 0; --b) { + if (x & (1u << b)) + return b + 1; + } + + return 0; +} + +struct rte_reciprocal rte_reciprocal_value(uint32_t d) +{ + struct rte_reciprocal R; + uint64_t m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + m /= d; + + ++m; + R.m = m; + R.sh1 = RTE_MIN(l, 1); + R.sh2 = RTE_MAX(l - 1, 0); + + return R; +} diff --git a/lib/librte_sched/rte_reciprocal.h b/lib/librte_sched/rte_reciprocal.h new file mode 100644 index 00000000..abd15251 --- /dev/null +++ b/lib/librte_sched/rte_reciprocal.h @@ -0,0 +1,39 @@ +/* + * Reciprocal divide + * + * Used with permission from original authors + * Hannes Frederic Sowa and Daniel Borkmann + * + * This algorithm is based on the paper "Division by Invariant + * Integers Using Multiplication" by Torbjörn Granlund and Peter + * L. Montgomery. + * + * The assembler implementation from Agner Fog, which this code is + * based on, can be found here: + * http://www.agner.org/optimize/asmlib.zip + * + * This optimization for A/B is helpful if the divisor B is mostly + * runtime invariant. The reciprocal of B is calculated in the + * slow-path with reciprocal_value(). The fast-path can then just use + * a much faster multiplication operation with a variable dividend A + * to calculate the division A/B. + */ + +#ifndef _RTE_RECIPROCAL_H_ +#define _RTE_RECIPROCAL_H_ + +struct rte_reciprocal { + uint32_t m; + uint8_t sh1, sh2; +}; + +static inline uint32_t rte_reciprocal_divide(uint32_t a, struct rte_reciprocal R) +{ + uint32_t t = (uint32_t)(((uint64_t)a * R.m) >> 32); + + return (t + ((a - t) >> R.sh1)) >> R.sh2; +} + +struct rte_reciprocal rte_reciprocal_value(uint32_t d); + +#endif /* _RTE_RECIPROCAL_H_ */ diff --git a/lib/librte_sched/rte_red.c b/lib/librte_sched/rte_red.c new file mode 100644 index 00000000..fdf40576 --- /dev/null +++ b/lib/librte_sched/rte_red.c @@ -0,0 +1,158 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include "rte_red.h" +#include +#include + +#ifdef __INTEL_COMPILER +#pragma warning(disable:2259) /* conversion may lose significant bits */ +#endif + +static int rte_red_init_done = 0; /**< Flag to indicate that global initialisation is done */ +uint32_t rte_red_rand_val = 0; /**< Random value cache */ +uint32_t rte_red_rand_seed = 0; /**< Seed for random number generation */ + +/** + * table[i] = log2(1-Wq) * Scale * -1 + * Wq = 1/(2^i) + */ +uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM]; + +/** + * table[i] = 2^(i/16) * Scale + */ +uint16_t rte_red_pow2_frac_inv[16]; + +/** + * @brief Initialize tables used to compute average + * queue size when queue is empty. + */ +static void +__rte_red_init_tables(void) +{ + uint32_t i = 0; + double scale = 0.0; + double table_size = 0.0; + + scale = (double)(1 << RTE_RED_SCALING); + table_size = (double)(RTE_DIM(rte_red_pow2_frac_inv)); + + for (i = 0; i < RTE_DIM(rte_red_pow2_frac_inv); i++) { + double m = (double)i; + + rte_red_pow2_frac_inv[i] = (uint16_t) round(scale / pow(2, m / table_size)); + } + + scale = 1024.0; + + RTE_RED_ASSERT(RTE_RED_WQ_LOG2_NUM == RTE_DIM(rte_red_log2_1_minus_Wq)); + + for (i = RTE_RED_WQ_LOG2_MIN; i <= RTE_RED_WQ_LOG2_MAX; i++) { + double n = (double)i; + double Wq = pow(2, -n); + uint32_t index = i - RTE_RED_WQ_LOG2_MIN; + + rte_red_log2_1_minus_Wq[index] = (uint16_t) round(-1.0 * scale * log2(1.0 - Wq)); + /** + * Table entry of zero, corresponds to a Wq of zero + * which is not valid (avg would remain constant no + * matter how long the queue is empty). So we have + * to check for zero and round up to one. + */ + if (rte_red_log2_1_minus_Wq[index] == 0) { + rte_red_log2_1_minus_Wq[index] = 1; + } + } +} + +int +rte_red_rt_data_init(struct rte_red *red) +{ + if (red == NULL) + return -1; + + red->avg = 0; + red->count = 0; + red->q_time = 0; + return 0; +} + +int +rte_red_config_init(struct rte_red_config *red_cfg, + const uint16_t wq_log2, + const uint16_t min_th, + const uint16_t max_th, + const uint16_t maxp_inv) +{ + if (red_cfg == NULL) { + return -1; + } + if (max_th > RTE_RED_MAX_TH_MAX) { + return -2; + } + if (min_th >= max_th) { + return -3; + } + if (wq_log2 > RTE_RED_WQ_LOG2_MAX) { + return -4; + } + if (wq_log2 < RTE_RED_WQ_LOG2_MIN) { + return -5; + } + if (maxp_inv < RTE_RED_MAXP_INV_MIN) { + return -6; + } + if (maxp_inv > RTE_RED_MAXP_INV_MAX) { + return -7; + } + + /** + * Initialize the RED module if not already done + */ + if (!rte_red_init_done) { + rte_red_rand_seed = rte_rand(); + rte_red_rand_val = rte_fast_rand(); + __rte_red_init_tables(); + rte_red_init_done = 1; + } + + red_cfg->min_th = ((uint32_t) min_th) << (wq_log2 + RTE_RED_SCALING); + red_cfg->max_th = ((uint32_t) max_th) << (wq_log2 + RTE_RED_SCALING); + red_cfg->pa_const = (2 * (max_th - min_th) * maxp_inv) << RTE_RED_SCALING; + red_cfg->maxp_inv = maxp_inv; + red_cfg->wq_log2 = wq_log2; + + return 0; +} diff --git a/lib/librte_sched/rte_red.h b/lib/librte_sched/rte_red.h new file mode 100644 index 00000000..7f9ac901 --- /dev/null +++ b/lib/librte_sched/rte_red.h @@ -0,0 +1,453 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __RTE_RED_H_INCLUDED__ +#define __RTE_RED_H_INCLUDED__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Random Early Detection (RED) + * + * + ***/ + +#include +#include +#include +#include +#include +#include + +#define RTE_RED_SCALING 10 /**< Fraction size for fixed-point */ +#define RTE_RED_S (1 << 22) /**< Packet size multiplied by number of leaf queues */ +#define RTE_RED_MAX_TH_MAX 1023 /**< Max threshold limit in fixed point format */ +#define RTE_RED_WQ_LOG2_MIN 1 /**< Min inverse filter weight value */ +#define RTE_RED_WQ_LOG2_MAX 12 /**< Max inverse filter weight value */ +#define RTE_RED_MAXP_INV_MIN 1 /**< Min inverse mark probability value */ +#define RTE_RED_MAXP_INV_MAX 255 /**< Max inverse mark probability value */ +#define RTE_RED_2POW16 (1<<16) /**< 2 power 16 */ +#define RTE_RED_INT16_NBITS (sizeof(uint16_t) * CHAR_BIT) +#define RTE_RED_WQ_LOG2_NUM (RTE_RED_WQ_LOG2_MAX - RTE_RED_WQ_LOG2_MIN + 1) + +#ifdef RTE_RED_DEBUG + +#define RTE_RED_ASSERT(exp) \ +if (!(exp)) { \ + rte_panic("line%d\tassert \"" #exp "\" failed\n", __LINE__); \ +} + +#else + +#define RTE_RED_ASSERT(exp) do { } while(0) + +#endif /* RTE_RED_DEBUG */ + +/** + * Externs + * + */ +extern uint32_t rte_red_rand_val; +extern uint32_t rte_red_rand_seed; +extern uint16_t rte_red_log2_1_minus_Wq[RTE_RED_WQ_LOG2_NUM]; +extern uint16_t rte_red_pow2_frac_inv[16]; + +/** + * RED configuration parameters passed by user + * + */ +struct rte_red_params { + uint16_t min_th; /**< Minimum threshold for queue (max_th) */ + uint16_t max_th; /**< Maximum threshold for queue (max_th) */ + uint16_t maxp_inv; /**< Inverse of packet marking probability maximum value (maxp = 1 / maxp_inv) */ + uint16_t wq_log2; /**< Negated log2 of queue weight (wq = 1 / (2 ^ wq_log2)) */ +}; + +/** + * RED configuration parameters + */ +struct rte_red_config { + uint32_t min_th; /**< min_th scaled in fixed-point format */ + uint32_t max_th; /**< max_th scaled in fixed-point format */ + uint32_t pa_const; /**< Precomputed constant value used for pa calculation (scaled in fixed-point format) */ + uint8_t maxp_inv; /**< maxp_inv */ + uint8_t wq_log2; /**< wq_log2 */ +}; + +/** + * RED run-time data + */ +struct rte_red { + uint32_t avg; /**< Average queue size (avg), scaled in fixed-point format */ + uint32_t count; /**< Number of packets since last marked packet (count) */ + uint64_t q_time; /**< Start of the queue idle time (q_time) */ +}; + +/** + * @brief Initialises run-time data + * + * @param red [in,out] data pointer to RED runtime data + * + * @return Operation status + * @retval 0 success + * @retval !0 error + */ +int +rte_red_rt_data_init(struct rte_red *red); + +/** + * @brief Configures a single RED configuration parameter structure. + * + * @param red_cfg [in,out] config pointer to a RED configuration parameter structure + * @param wq_log2 [in] log2 of the filter weight, valid range is: + * RTE_RED_WQ_LOG2_MIN <= wq_log2 <= RTE_RED_WQ_LOG2_MAX + * @param min_th [in] queue minimum threshold in number of packets + * @param max_th [in] queue maximum threshold in number of packets + * @param maxp_inv [in] inverse maximum mark probability + * + * @return Operation status + * @retval 0 success + * @retval !0 error + */ +int +rte_red_config_init(struct rte_red_config *red_cfg, + const uint16_t wq_log2, + const uint16_t min_th, + const uint16_t max_th, + const uint16_t maxp_inv); + +/** + * @brief Generate random number for RED + * + * Implemenetation based on: + * http://software.intel.com/en-us/articles/fast-random-number-generator-on-the-intel-pentiumr-4-processor/ + * + * 10 bit shift has been found through empirical tests (was 16). + * + * @return Random number between 0 and (2^22 - 1) + */ +static inline uint32_t +rte_fast_rand(void) +{ + rte_red_rand_seed = (214013 * rte_red_rand_seed) + 2531011; + return rte_red_rand_seed >> 10; +} + +/** + * @brief calculate factor to scale average queue size when queue + * becomes empty + * + * @param wq_log2 [in] where EWMA filter weight wq = 1/(2 ^ wq_log2) + * @param m [in] exponent in the computed value (1 - wq) ^ m + * + * @return computed value + * @retval ((1 - wq) ^ m) scaled in fixed-point format + */ +static inline uint16_t +__rte_red_calc_qempty_factor(uint8_t wq_log2, uint16_t m) +{ + uint32_t n = 0; + uint32_t f = 0; + + /** + * Basic math tells us that: + * a^b = 2^(b * log2(a) ) + * + * in our case: + * a = (1-Wq) + * b = m + * Wq = 1/ (2^log2n) + * + * So we are computing this equation: + * factor = 2 ^ ( m * log2(1-Wq)) + * + * First we are computing: + * n = m * log2(1-Wq) + * + * To avoid dealing with signed numbers log2 values are positive + * but they should be negative because (1-Wq) is always < 1. + * Contents of log2 table values are also scaled for precision. + */ + + n = m * rte_red_log2_1_minus_Wq[wq_log2 - RTE_RED_WQ_LOG2_MIN]; + + /** + * The tricky part is computing 2^n, for this I split n into + * integer part and fraction part. + * f - is fraction part of n + * n - is integer part of original n + * + * Now using basic math we compute 2^n: + * 2^(f+n) = 2^f * 2^n + * 2^f - we use lookup table + * 2^n - can be replaced with bit shift right oeprations + */ + + f = (n >> 6) & 0xf; + n >>= 10; + + if (n < RTE_RED_SCALING) + return (uint16_t) ((rte_red_pow2_frac_inv[f] + (1 << (n - 1))) >> n); + + return 0; +} + +/** + * @brief Updates queue average in condition when queue is empty + * + * Note: packet is never dropped in this particular case. + * + * @param red_cfg [in] config pointer to a RED configuration parameter structure + * @param red [in,out] data pointer to RED runtime data + * @param time [in] current time stamp + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on max threshold criterion + * @retval 2 drop the packet based on mark probability criterion + */ +static inline int +rte_red_enqueue_empty(const struct rte_red_config *red_cfg, + struct rte_red *red, + const uint64_t time) +{ + uint64_t time_diff = 0, m = 0; + + RTE_RED_ASSERT(red_cfg != NULL); + RTE_RED_ASSERT(red != NULL); + + red->count ++; + + /** + * We compute avg but we don't compare avg against + * min_th or max_th, nor calculate drop probability + */ + time_diff = time - red->q_time; + + /** + * m is the number of packets that might have arrived while the queue was empty. + * In this case we have time stamps provided by scheduler in byte units (bytes + * transmitted on network port). Such time stamp translates into time units as + * port speed is fixed but such approach simplifies the code. + */ + m = time_diff / RTE_RED_S; + + /** + * Check that m will fit into 16-bit unsigned integer + */ + if (m >= RTE_RED_2POW16) { + red->avg = 0; + } else { + red->avg = (red->avg >> RTE_RED_SCALING) * __rte_red_calc_qempty_factor(red_cfg->wq_log2, (uint16_t) m); + } + + return 0; +} + +/** + * Drop probability (Sally Floyd and Van Jacobson): + * + * pb = (1 / maxp_inv) * (avg - min_th) / (max_th - min_th) + * pa = pb / (2 - count * pb) + * + * + * (1 / maxp_inv) * (avg - min_th) + * --------------------------------- + * max_th - min_th + * pa = ----------------------------------------------- + * count * (1 / maxp_inv) * (avg - min_th) + * 2 - ----------------------------------------- + * max_th - min_th + * + * + * avg - min_th + * pa = ----------------------------------------------------------- + * 2 * (max_th - min_th) * maxp_inv - count * (avg - min_th) + * + * + * We define pa_const as: pa_const = 2 * (max_th - min_th) * maxp_inv. Then: + * + * + * avg - min_th + * pa = ----------------------------------- + * pa_const - count * (avg - min_th) + */ + +/** + * @brief make a decision to drop or enqueue a packet based on mark probability + * criteria + * + * @param red_cfg [in] config pointer to structure defining RED parameters + * @param red [in,out] data pointer to RED runtime data + * + * @return operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet + */ +static inline int +__rte_red_drop(const struct rte_red_config *red_cfg, struct rte_red *red) +{ + uint32_t pa_num = 0; /* numerator of drop-probability */ + uint32_t pa_den = 0; /* denominator of drop-probability */ + uint32_t pa_num_count = 0; + + pa_num = (red->avg - red_cfg->min_th) >> (red_cfg->wq_log2); + + pa_num_count = red->count * pa_num; + + if (red_cfg->pa_const <= pa_num_count) + return 1; + + pa_den = red_cfg->pa_const - pa_num_count; + + /* If drop, generate and save random number to be used next time */ + if (unlikely((rte_red_rand_val % pa_den) < pa_num)) { + rte_red_rand_val = rte_fast_rand(); + + return 1; + } + + /* No drop */ + return 0; +} + +/** + * @brief Decides if new packet should be enqeued or dropped in queue non-empty case + * + * @param red_cfg [in] config pointer to a RED configuration parameter structure + * @param red [in,out] data pointer to RED runtime data + * @param q [in] current queue size (measured in packets) + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on max threshold criterion + * @retval 2 drop the packet based on mark probability criterion + */ +static inline int +rte_red_enqueue_nonempty(const struct rte_red_config *red_cfg, + struct rte_red *red, + const unsigned q) +{ + RTE_RED_ASSERT(red_cfg != NULL); + RTE_RED_ASSERT(red != NULL); + + /** + * EWMA filter (Sally Floyd and Van Jacobson): + * avg = (1 - wq) * avg + wq * q + * avg = avg + q * wq - avg * wq + * + * We select: wq = 2^(-n). Let scaled version of avg be: avg_s = avg * 2^(N+n). We get: + * avg_s = avg_s + q * 2^N - avg_s * 2^(-n) + * + * By using shift left/right operations, we get: + * avg_s = avg_s + (q << N) - (avg_s >> n) + * avg_s += (q << N) - (avg_s >> n) + */ + + /* avg update */ + red->avg += (q << RTE_RED_SCALING) - (red->avg >> red_cfg->wq_log2); + + /* avg < min_th: do not mark the packet */ + if (red->avg < red_cfg->min_th) { + red->count ++; + return 0; + } + + /* min_th <= avg < max_th: mark the packet with pa probability */ + if (red->avg < red_cfg->max_th) { + if (!__rte_red_drop(red_cfg, red)) { + red->count ++; + return 0; + } + + red->count = 0; + return 2; + } + + /* max_th <= avg: always mark the packet */ + red->count = 0; + return 1; +} + +/** + * @brief Decides if new packet should be enqeued or dropped + * Updates run time data based on new queue size value. + * Based on new queue average and RED configuration parameters + * gives verdict whether to enqueue or drop the packet. + * + * @param red_cfg [in] config pointer to a RED configuration parameter structure + * @param red [in,out] data pointer to RED runtime data + * @param q [in] updated queue size in packets + * @param time [in] current time stamp + * + * @return Operation status + * @retval 0 enqueue the packet + * @retval 1 drop the packet based on max threshold criteria + * @retval 2 drop the packet based on mark probability criteria + */ +static inline int +rte_red_enqueue(const struct rte_red_config *red_cfg, + struct rte_red *red, + const unsigned q, + const uint64_t time) +{ + RTE_RED_ASSERT(red_cfg != NULL); + RTE_RED_ASSERT(red != NULL); + + if (q != 0) { + return rte_red_enqueue_nonempty(red_cfg, red, q); + } else { + return rte_red_enqueue_empty(red_cfg, red, time); + } +} + +/** + * @brief Callback to records time that queue became empty + * + * @param red [in,out] data pointer to RED runtime data + * @param time [in] current time stamp + */ +static inline void +rte_red_mark_queue_empty(struct rte_red *red, const uint64_t time) +{ + red->q_time = time; +} + +#ifdef __cplusplus +} +#endif + +#endif /* __RTE_RED_H_INCLUDED__ */ diff --git a/lib/librte_sched/rte_sched.c b/lib/librte_sched/rte_sched.c new file mode 100644 index 00000000..1609ea87 --- /dev/null +++ b/lib/librte_sched/rte_sched.c @@ -0,0 +1,2156 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_sched.h" +#include "rte_bitmap.h" +#include "rte_sched_common.h" +#include "rte_approx.h" +#include "rte_reciprocal.h" + +#ifdef __INTEL_COMPILER +#pragma warning(disable:2259) /* conversion may lose significant bits */ +#endif + +#ifdef RTE_SCHED_VECTOR +#include + +#if defined(__SSE4__) +#define SCHED_VECTOR_SSE4 +#endif + +#endif + +#define RTE_SCHED_TB_RATE_CONFIG_ERR (1e-7) +#define RTE_SCHED_WRR_SHIFT 3 +#define RTE_SCHED_GRINDER_PCACHE_SIZE (64 / RTE_SCHED_QUEUES_PER_PIPE) +#define RTE_SCHED_PIPE_INVALID UINT32_MAX +#define RTE_SCHED_BMP_POS_INVALID UINT32_MAX + +/* Scaling for cycles_per_byte calculation + * Chosen so that minimum rate is 480 bit/sec + */ +#define RTE_SCHED_TIME_SHIFT 8 + +struct rte_sched_subport { + /* Token bucket (TB) */ + uint64_t tb_time; /* time of last update */ + uint32_t tb_period; + uint32_t tb_credits_per_period; + uint32_t tb_size; + uint32_t tb_credits; + + /* Traffic classes (TCs) */ + uint64_t tc_time; /* time of next update */ + uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_period; + + /* TC oversubscription */ + uint32_t tc_ov_wm; + uint32_t tc_ov_wm_min; + uint32_t tc_ov_wm_max; + uint8_t tc_ov_period_id; + uint8_t tc_ov; + uint32_t tc_ov_n; + double tc_ov_rate; + + /* Statistics */ + struct rte_sched_subport_stats stats; +}; + +struct rte_sched_pipe_profile { + /* Token bucket (TB) */ + uint32_t tb_period; + uint32_t tb_credits_per_period; + uint32_t tb_size; + + /* Pipe traffic classes */ + uint32_t tc_period; + uint32_t tc_credits_per_period[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint8_t tc_ov_weight; + + /* Pipe queues */ + uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_PIPE]; +}; + +struct rte_sched_pipe { + /* Token bucket (TB) */ + uint64_t tb_time; /* time of last update */ + uint32_t tb_credits; + + /* Pipe profile and flags */ + uint32_t profile; + + /* Traffic classes (TCs) */ + uint64_t tc_time; /* time of next update */ + uint32_t tc_credits[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + + /* Weighted Round Robin (WRR) */ + uint8_t wrr_tokens[RTE_SCHED_QUEUES_PER_PIPE]; + + /* TC oversubscription */ + uint32_t tc_ov_credits; + uint8_t tc_ov_period_id; + uint8_t reserved[3]; +} __rte_cache_aligned; + +struct rte_sched_queue { + uint16_t qw; + uint16_t qr; +}; + +struct rte_sched_queue_extra { + struct rte_sched_queue_stats stats; +#ifdef RTE_SCHED_RED + struct rte_red red; +#endif +}; + +enum grinder_state { + e_GRINDER_PREFETCH_PIPE = 0, + e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS, + e_GRINDER_PREFETCH_MBUF, + e_GRINDER_READ_MBUF +}; + +/* + * Path through the scheduler hierarchy used by the scheduler enqueue + * operation to identify the destination queue for the current + * packet. Stored in the field pkt.hash.sched of struct rte_mbuf of + * each packet, typically written by the classification stage and read + * by scheduler enqueue. + */ +struct rte_sched_port_hierarchy { + uint16_t queue:2; /**< Queue ID (0 .. 3) */ + uint16_t traffic_class:2; /**< Traffic class ID (0 .. 3)*/ + uint32_t color:2; /**< Color */ + uint16_t unused:10; + uint16_t subport; /**< Subport ID */ + uint32_t pipe; /**< Pipe ID */ +}; + +struct rte_sched_grinder { + /* Pipe cache */ + uint16_t pcache_qmask[RTE_SCHED_GRINDER_PCACHE_SIZE]; + uint32_t pcache_qindex[RTE_SCHED_GRINDER_PCACHE_SIZE]; + uint32_t pcache_w; + uint32_t pcache_r; + + /* Current pipe */ + enum grinder_state state; + uint32_t productive; + uint32_t pindex; + struct rte_sched_subport *subport; + struct rte_sched_pipe *pipe; + struct rte_sched_pipe_profile *pipe_params; + + /* TC cache */ + uint8_t tccache_qmask[4]; + uint32_t tccache_qindex[4]; + uint32_t tccache_w; + uint32_t tccache_r; + + /* Current TC */ + uint32_t tc_index; + struct rte_sched_queue *queue[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + struct rte_mbuf **qbase[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t qindex[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint16_t qsize; + uint32_t qmask; + uint32_t qpos; + struct rte_mbuf *pkt; + + /* WRR */ + uint16_t wrr_tokens[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint16_t wrr_mask[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint8_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; +}; + +struct rte_sched_port { + /* User parameters */ + uint32_t n_subports_per_port; + uint32_t n_pipes_per_subport; + uint32_t rate; + uint32_t mtu; + uint32_t frame_overhead; + uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t n_pipe_profiles; + uint32_t pipe_tc3_rate_max; +#ifdef RTE_SCHED_RED + struct rte_red_config red_config[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; +#endif + + /* Timing */ + uint64_t time_cpu_cycles; /* Current CPU time measured in CPU cyles */ + uint64_t time_cpu_bytes; /* Current CPU time measured in bytes */ + uint64_t time; /* Current NIC TX time measured in bytes */ + struct rte_reciprocal inv_cycles_per_byte; /* CPU cycles per byte */ + + /* Scheduling loop detection */ + uint32_t pipe_loop; + uint32_t pipe_exhaustion; + + /* Bitmap */ + struct rte_bitmap *bmp; + uint32_t grinder_base_bmp_pos[RTE_SCHED_PORT_N_GRINDERS] __rte_aligned_16; + + /* Grinders */ + struct rte_sched_grinder grinder[RTE_SCHED_PORT_N_GRINDERS]; + uint32_t busy_grinders; + struct rte_mbuf **pkts_out; + uint32_t n_pkts_out; + + /* Queue base calculation */ + uint32_t qsize_add[RTE_SCHED_QUEUES_PER_PIPE]; + uint32_t qsize_sum; + + /* Large data structures */ + struct rte_sched_subport *subport; + struct rte_sched_pipe *pipe; + struct rte_sched_queue *queue; + struct rte_sched_queue_extra *queue_extra; + struct rte_sched_pipe_profile *pipe_profiles; + uint8_t *bmp_array; + struct rte_mbuf **queue_array; + uint8_t memory[0] __rte_cache_aligned; +} __rte_cache_aligned; + +enum rte_sched_port_array { + e_RTE_SCHED_PORT_ARRAY_SUBPORT = 0, + e_RTE_SCHED_PORT_ARRAY_PIPE, + e_RTE_SCHED_PORT_ARRAY_QUEUE, + e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA, + e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES, + e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY, + e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY, + e_RTE_SCHED_PORT_ARRAY_TOTAL, +}; + +#ifdef RTE_SCHED_COLLECT_STATS + +static inline uint32_t +rte_sched_port_queues_per_subport(struct rte_sched_port *port) +{ + return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport; +} + +#endif + +static inline uint32_t +rte_sched_port_queues_per_port(struct rte_sched_port *port) +{ + return RTE_SCHED_QUEUES_PER_PIPE * port->n_pipes_per_subport * port->n_subports_per_port; +} + +static inline struct rte_mbuf ** +rte_sched_port_qbase(struct rte_sched_port *port, uint32_t qindex) +{ + uint32_t pindex = qindex >> 4; + uint32_t qpos = qindex & 0xF; + + return (port->queue_array + pindex * + port->qsize_sum + port->qsize_add[qpos]); +} + +static inline uint16_t +rte_sched_port_qsize(struct rte_sched_port *port, uint32_t qindex) +{ + uint32_t tc = (qindex >> 2) & 0x3; + + return port->qsize[tc]; +} + +static int +rte_sched_port_check_params(struct rte_sched_port_params *params) +{ + uint32_t i, j; + + if (params == NULL) + return -1; + + /* socket */ + if ((params->socket < 0) || (params->socket >= RTE_MAX_NUMA_NODES)) + return -3; + + /* rate */ + if (params->rate == 0) + return -4; + + /* mtu */ + if (params->mtu == 0) + return -5; + + /* n_subports_per_port: non-zero, limited to 16 bits, power of 2 */ + if (params->n_subports_per_port == 0 || + params->n_subports_per_port > 1u << 16 || + !rte_is_power_of_2(params->n_subports_per_port)) + return -6; + + /* n_pipes_per_subport: non-zero, power of 2 */ + if (params->n_pipes_per_subport == 0 || + !rte_is_power_of_2(params->n_pipes_per_subport)) + return -7; + + /* qsize: non-zero, power of 2, + * no bigger than 32K (due to 16-bit read/write pointers) + */ + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + uint16_t qsize = params->qsize[i]; + + if (qsize == 0 || !rte_is_power_of_2(qsize)) + return -8; + } + + /* pipe_profiles and n_pipe_profiles */ + if (params->pipe_profiles == NULL || + params->n_pipe_profiles == 0 || + params->n_pipe_profiles > RTE_SCHED_PIPE_PROFILES_PER_PORT) + return -9; + + for (i = 0; i < params->n_pipe_profiles; i++) { + struct rte_sched_pipe_params *p = params->pipe_profiles + i; + + /* TB rate: non-zero, not greater than port rate */ + if (p->tb_rate == 0 || p->tb_rate > params->rate) + return -10; + + /* TB size: non-zero */ + if (p->tb_size == 0) + return -11; + + /* TC rate: non-zero, less than pipe rate */ + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + if (p->tc_rate[j] == 0 || p->tc_rate[j] > p->tb_rate) + return -12; + } + + /* TC period: non-zero */ + if (p->tc_period == 0) + return -13; + +#ifdef RTE_SCHED_SUBPORT_TC_OV + /* TC3 oversubscription weight: non-zero */ + if (p->tc_ov_weight == 0) + return -14; +#endif + + /* Queue WRR weights: non-zero */ + for (j = 0; j < RTE_SCHED_QUEUES_PER_PIPE; j++) { + if (p->wrr_weights[j] == 0) + return -15; + } + } + + return 0; +} + +static uint32_t +rte_sched_port_get_array_base(struct rte_sched_port_params *params, enum rte_sched_port_array array) +{ + uint32_t n_subports_per_port = params->n_subports_per_port; + uint32_t n_pipes_per_subport = params->n_pipes_per_subport; + uint32_t n_pipes_per_port = n_pipes_per_subport * n_subports_per_port; + uint32_t n_queues_per_port = RTE_SCHED_QUEUES_PER_PIPE * n_pipes_per_subport * n_subports_per_port; + + uint32_t size_subport = n_subports_per_port * sizeof(struct rte_sched_subport); + uint32_t size_pipe = n_pipes_per_port * sizeof(struct rte_sched_pipe); + uint32_t size_queue = n_queues_per_port * sizeof(struct rte_sched_queue); + uint32_t size_queue_extra + = n_queues_per_port * sizeof(struct rte_sched_queue_extra); + uint32_t size_pipe_profiles + = RTE_SCHED_PIPE_PROFILES_PER_PORT * sizeof(struct rte_sched_pipe_profile); + uint32_t size_bmp_array = rte_bitmap_get_memory_footprint(n_queues_per_port); + uint32_t size_per_pipe_queue_array, size_queue_array; + + uint32_t base, i; + + size_per_pipe_queue_array = 0; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + size_per_pipe_queue_array += RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + * params->qsize[i] * sizeof(struct rte_mbuf *); + } + size_queue_array = n_pipes_per_port * size_per_pipe_queue_array; + + base = 0; + + if (array == e_RTE_SCHED_PORT_ARRAY_SUBPORT) + return base; + base += RTE_CACHE_LINE_ROUNDUP(size_subport); + + if (array == e_RTE_SCHED_PORT_ARRAY_PIPE) + return base; + base += RTE_CACHE_LINE_ROUNDUP(size_pipe); + + if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE) + return base; + base += RTE_CACHE_LINE_ROUNDUP(size_queue); + + if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA) + return base; + base += RTE_CACHE_LINE_ROUNDUP(size_queue_extra); + + if (array == e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES) + return base; + base += RTE_CACHE_LINE_ROUNDUP(size_pipe_profiles); + + if (array == e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY) + return base; + base += RTE_CACHE_LINE_ROUNDUP(size_bmp_array); + + if (array == e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY) + return base; + base += RTE_CACHE_LINE_ROUNDUP(size_queue_array); + + return base; +} + +uint32_t +rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params) +{ + uint32_t size0, size1; + int status; + + status = rte_sched_port_check_params(params); + if (status != 0) { + RTE_LOG(NOTICE, SCHED, + "Port scheduler params check failed (%d)\n", status); + + return 0; + } + + size0 = sizeof(struct rte_sched_port); + size1 = rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_TOTAL); + + return size0 + size1; +} + +static void +rte_sched_port_config_qsize(struct rte_sched_port *port) +{ + /* TC 0 */ + port->qsize_add[0] = 0; + port->qsize_add[1] = port->qsize_add[0] + port->qsize[0]; + port->qsize_add[2] = port->qsize_add[1] + port->qsize[0]; + port->qsize_add[3] = port->qsize_add[2] + port->qsize[0]; + + /* TC 1 */ + port->qsize_add[4] = port->qsize_add[3] + port->qsize[0]; + port->qsize_add[5] = port->qsize_add[4] + port->qsize[1]; + port->qsize_add[6] = port->qsize_add[5] + port->qsize[1]; + port->qsize_add[7] = port->qsize_add[6] + port->qsize[1]; + + /* TC 2 */ + port->qsize_add[8] = port->qsize_add[7] + port->qsize[1]; + port->qsize_add[9] = port->qsize_add[8] + port->qsize[2]; + port->qsize_add[10] = port->qsize_add[9] + port->qsize[2]; + port->qsize_add[11] = port->qsize_add[10] + port->qsize[2]; + + /* TC 3 */ + port->qsize_add[12] = port->qsize_add[11] + port->qsize[2]; + port->qsize_add[13] = port->qsize_add[12] + port->qsize[3]; + port->qsize_add[14] = port->qsize_add[13] + port->qsize[3]; + port->qsize_add[15] = port->qsize_add[14] + port->qsize[3]; + + port->qsize_sum = port->qsize_add[15] + port->qsize[3]; +} + +static void +rte_sched_port_log_pipe_profile(struct rte_sched_port *port, uint32_t i) +{ + struct rte_sched_pipe_profile *p = port->pipe_profiles + i; + + RTE_LOG(DEBUG, SCHED, "Low level config for pipe profile %u:\n" + " Token bucket: period = %u, credits per period = %u, size = %u\n" + " Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n" + " Traffic class 3 oversubscription: weight = %hhu\n" + " WRR cost: [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu], [%hhu, %hhu, %hhu, %hhu]\n", + i, + + /* Token bucket */ + p->tb_period, + p->tb_credits_per_period, + p->tb_size, + + /* Traffic classes */ + p->tc_period, + p->tc_credits_per_period[0], + p->tc_credits_per_period[1], + p->tc_credits_per_period[2], + p->tc_credits_per_period[3], + + /* Traffic class 3 oversubscription */ + p->tc_ov_weight, + + /* WRR */ + p->wrr_cost[ 0], p->wrr_cost[ 1], p->wrr_cost[ 2], p->wrr_cost[ 3], + p->wrr_cost[ 4], p->wrr_cost[ 5], p->wrr_cost[ 6], p->wrr_cost[ 7], + p->wrr_cost[ 8], p->wrr_cost[ 9], p->wrr_cost[10], p->wrr_cost[11], + p->wrr_cost[12], p->wrr_cost[13], p->wrr_cost[14], p->wrr_cost[15]); +} + +static inline uint64_t +rte_sched_time_ms_to_bytes(uint32_t time_ms, uint32_t rate) +{ + uint64_t time = time_ms; + + time = (time * rate) / 1000; + + return time; +} + +static void +rte_sched_port_config_pipe_profile_table(struct rte_sched_port *port, struct rte_sched_port_params *params) +{ + uint32_t i, j; + + for (i = 0; i < port->n_pipe_profiles; i++) { + struct rte_sched_pipe_params *src = params->pipe_profiles + i; + struct rte_sched_pipe_profile *dst = port->pipe_profiles + i; + + /* Token Bucket */ + if (src->tb_rate == params->rate) { + dst->tb_credits_per_period = 1; + dst->tb_period = 1; + } else { + double tb_rate = (double) src->tb_rate + / (double) params->rate; + double d = RTE_SCHED_TB_RATE_CONFIG_ERR; + + rte_approx(tb_rate, d, + &dst->tb_credits_per_period, &dst->tb_period); + } + dst->tb_size = src->tb_size; + + /* Traffic Classes */ + dst->tc_period = rte_sched_time_ms_to_bytes(src->tc_period, + params->rate); + + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) + dst->tc_credits_per_period[j] + = rte_sched_time_ms_to_bytes(src->tc_period, + src->tc_rate[j]); + +#ifdef RTE_SCHED_SUBPORT_TC_OV + dst->tc_ov_weight = src->tc_ov_weight; +#endif + + /* WRR */ + for (j = 0; j < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; j++) { + uint32_t wrr_cost[RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS]; + uint32_t lcd, lcd1, lcd2; + uint32_t qindex; + + qindex = j * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS; + + wrr_cost[0] = src->wrr_weights[qindex]; + wrr_cost[1] = src->wrr_weights[qindex + 1]; + wrr_cost[2] = src->wrr_weights[qindex + 2]; + wrr_cost[3] = src->wrr_weights[qindex + 3]; + + lcd1 = rte_get_lcd(wrr_cost[0], wrr_cost[1]); + lcd2 = rte_get_lcd(wrr_cost[2], wrr_cost[3]); + lcd = rte_get_lcd(lcd1, lcd2); + + wrr_cost[0] = lcd / wrr_cost[0]; + wrr_cost[1] = lcd / wrr_cost[1]; + wrr_cost[2] = lcd / wrr_cost[2]; + wrr_cost[3] = lcd / wrr_cost[3]; + + dst->wrr_cost[qindex] = (uint8_t) wrr_cost[0]; + dst->wrr_cost[qindex + 1] = (uint8_t) wrr_cost[1]; + dst->wrr_cost[qindex + 2] = (uint8_t) wrr_cost[2]; + dst->wrr_cost[qindex + 3] = (uint8_t) wrr_cost[3]; + } + + rte_sched_port_log_pipe_profile(port, i); + } + + port->pipe_tc3_rate_max = 0; + for (i = 0; i < port->n_pipe_profiles; i++) { + struct rte_sched_pipe_params *src = params->pipe_profiles + i; + uint32_t pipe_tc3_rate = src->tc_rate[3]; + + if (port->pipe_tc3_rate_max < pipe_tc3_rate) + port->pipe_tc3_rate_max = pipe_tc3_rate; + } +} + +struct rte_sched_port * +rte_sched_port_config(struct rte_sched_port_params *params) +{ + struct rte_sched_port *port = NULL; + uint32_t mem_size, bmp_mem_size, n_queues_per_port, i, cycles_per_byte; + + /* Check user parameters. Determine the amount of memory to allocate */ + mem_size = rte_sched_port_get_memory_footprint(params); + if (mem_size == 0) + return NULL; + + /* Allocate memory to store the data structures */ + port = rte_zmalloc("qos_params", mem_size, RTE_CACHE_LINE_SIZE); + if (port == NULL) + return NULL; + + /* compile time checks */ + RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS == 0); + RTE_BUILD_BUG_ON(RTE_SCHED_PORT_N_GRINDERS & (RTE_SCHED_PORT_N_GRINDERS - 1)); + + /* User parameters */ + port->n_subports_per_port = params->n_subports_per_port; + port->n_pipes_per_subport = params->n_pipes_per_subport; + port->rate = params->rate; + port->mtu = params->mtu + params->frame_overhead; + port->frame_overhead = params->frame_overhead; + memcpy(port->qsize, params->qsize, sizeof(params->qsize)); + port->n_pipe_profiles = params->n_pipe_profiles; + +#ifdef RTE_SCHED_RED + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + uint32_t j; + + for (j = 0; j < e_RTE_METER_COLORS; j++) { + /* if min/max are both zero, then RED is disabled */ + if ((params->red_params[i][j].min_th | + params->red_params[i][j].max_th) == 0) { + continue; + } + + if (rte_red_config_init(&port->red_config[i][j], + params->red_params[i][j].wq_log2, + params->red_params[i][j].min_th, + params->red_params[i][j].max_th, + params->red_params[i][j].maxp_inv) != 0) { + return NULL; + } + } + } +#endif + + /* Timing */ + port->time_cpu_cycles = rte_get_tsc_cycles(); + port->time_cpu_bytes = 0; + port->time = 0; + + cycles_per_byte = (rte_get_tsc_hz() << RTE_SCHED_TIME_SHIFT) + / params->rate; + port->inv_cycles_per_byte = rte_reciprocal_value(cycles_per_byte); + + /* Scheduling loop detection */ + port->pipe_loop = RTE_SCHED_PIPE_INVALID; + port->pipe_exhaustion = 0; + + /* Grinders */ + port->busy_grinders = 0; + port->pkts_out = NULL; + port->n_pkts_out = 0; + + /* Queue base calculation */ + rte_sched_port_config_qsize(port); + + /* Large data structures */ + port->subport = (struct rte_sched_subport *) + (port->memory + rte_sched_port_get_array_base(params, + e_RTE_SCHED_PORT_ARRAY_SUBPORT)); + port->pipe = (struct rte_sched_pipe *) + (port->memory + rte_sched_port_get_array_base(params, + e_RTE_SCHED_PORT_ARRAY_PIPE)); + port->queue = (struct rte_sched_queue *) + (port->memory + rte_sched_port_get_array_base(params, + e_RTE_SCHED_PORT_ARRAY_QUEUE)); + port->queue_extra = (struct rte_sched_queue_extra *) + (port->memory + rte_sched_port_get_array_base(params, + e_RTE_SCHED_PORT_ARRAY_QUEUE_EXTRA)); + port->pipe_profiles = (struct rte_sched_pipe_profile *) + (port->memory + rte_sched_port_get_array_base(params, + e_RTE_SCHED_PORT_ARRAY_PIPE_PROFILES)); + port->bmp_array = port->memory + + rte_sched_port_get_array_base(params, e_RTE_SCHED_PORT_ARRAY_BMP_ARRAY); + port->queue_array = (struct rte_mbuf **) + (port->memory + rte_sched_port_get_array_base(params, + e_RTE_SCHED_PORT_ARRAY_QUEUE_ARRAY)); + + /* Pipe profile table */ + rte_sched_port_config_pipe_profile_table(port, params); + + /* Bitmap */ + n_queues_per_port = rte_sched_port_queues_per_port(port); + bmp_mem_size = rte_bitmap_get_memory_footprint(n_queues_per_port); + port->bmp = rte_bitmap_init(n_queues_per_port, port->bmp_array, + bmp_mem_size); + if (port->bmp == NULL) { + RTE_LOG(ERR, SCHED, "Bitmap init error\n"); + return NULL; + } + + for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) + port->grinder_base_bmp_pos[i] = RTE_SCHED_PIPE_INVALID; + + + return port; +} + +void +rte_sched_port_free(struct rte_sched_port *port) +{ + unsigned int queue; + + /* Check user parameters */ + if (port == NULL) + return; + + /* Free enqueued mbufs */ + for (queue = 0; queue < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; queue++) { + struct rte_mbuf **mbufs = rte_sched_port_qbase(port, queue); + unsigned int i; + + for (i = 0; i < rte_sched_port_qsize(port, queue); i++) + rte_pktmbuf_free(mbufs[i]); + } + + rte_bitmap_free(port->bmp); + rte_free(port); +} + +static void +rte_sched_port_log_subport_config(struct rte_sched_port *port, uint32_t i) +{ + struct rte_sched_subport *s = port->subport + i; + + RTE_LOG(DEBUG, SCHED, "Low level config for subport %u:\n" + " Token bucket: period = %u, credits per period = %u, size = %u\n" + " Traffic classes: period = %u, credits per period = [%u, %u, %u, %u]\n" + " Traffic class 3 oversubscription: wm min = %u, wm max = %u\n", + i, + + /* Token bucket */ + s->tb_period, + s->tb_credits_per_period, + s->tb_size, + + /* Traffic classes */ + s->tc_period, + s->tc_credits_per_period[0], + s->tc_credits_per_period[1], + s->tc_credits_per_period[2], + s->tc_credits_per_period[3], + + /* Traffic class 3 oversubscription */ + s->tc_ov_wm_min, + s->tc_ov_wm_max); +} + +int +rte_sched_subport_config(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_params *params) +{ + struct rte_sched_subport *s; + uint32_t i; + + /* Check user parameters */ + if (port == NULL || + subport_id >= port->n_subports_per_port || + params == NULL) + return -1; + + if (params->tb_rate == 0 || params->tb_rate > port->rate) + return -2; + + if (params->tb_size == 0) + return -3; + + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + if (params->tc_rate[i] == 0 || + params->tc_rate[i] > params->tb_rate) + return -4; + } + + if (params->tc_period == 0) + return -5; + + s = port->subport + subport_id; + + /* Token Bucket (TB) */ + if (params->tb_rate == port->rate) { + s->tb_credits_per_period = 1; + s->tb_period = 1; + } else { + double tb_rate = ((double) params->tb_rate) / ((double) port->rate); + double d = RTE_SCHED_TB_RATE_CONFIG_ERR; + + rte_approx(tb_rate, d, &s->tb_credits_per_period, &s->tb_period); + } + + s->tb_size = params->tb_size; + s->tb_time = port->time; + s->tb_credits = s->tb_size / 2; + + /* Traffic Classes (TCs) */ + s->tc_period = rte_sched_time_ms_to_bytes(params->tc_period, port->rate); + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) { + s->tc_credits_per_period[i] + = rte_sched_time_ms_to_bytes(params->tc_period, + params->tc_rate[i]); + } + s->tc_time = port->time + s->tc_period; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + s->tc_credits[i] = s->tc_credits_per_period[i]; + +#ifdef RTE_SCHED_SUBPORT_TC_OV + /* TC oversubscription */ + s->tc_ov_wm_min = port->mtu; + s->tc_ov_wm_max = rte_sched_time_ms_to_bytes(params->tc_period, + port->pipe_tc3_rate_max); + s->tc_ov_wm = s->tc_ov_wm_max; + s->tc_ov_period_id = 0; + s->tc_ov = 0; + s->tc_ov_n = 0; + s->tc_ov_rate = 0; +#endif + + rte_sched_port_log_subport_config(port, subport_id); + + return 0; +} + +int +rte_sched_pipe_config(struct rte_sched_port *port, + uint32_t subport_id, + uint32_t pipe_id, + int32_t pipe_profile) +{ + struct rte_sched_subport *s; + struct rte_sched_pipe *p; + struct rte_sched_pipe_profile *params; + uint32_t deactivate, profile, i; + + /* Check user parameters */ + profile = (uint32_t) pipe_profile; + deactivate = (pipe_profile < 0); + + if (port == NULL || + subport_id >= port->n_subports_per_port || + pipe_id >= port->n_pipes_per_subport || + (!deactivate && profile >= port->n_pipe_profiles)) + return -1; + + + /* Check that subport configuration is valid */ + s = port->subport + subport_id; + if (s->tb_period == 0) + return -2; + + p = port->pipe + (subport_id * port->n_pipes_per_subport + pipe_id); + + /* Handle the case when pipe already has a valid configuration */ + if (p->tb_time) { + params = port->pipe_profiles + p->profile; + +#ifdef RTE_SCHED_SUBPORT_TC_OV + double subport_tc3_rate = (double) s->tc_credits_per_period[3] + / (double) s->tc_period; + double pipe_tc3_rate = (double) params->tc_credits_per_period[3] + / (double) params->tc_period; + uint32_t tc3_ov = s->tc_ov; + + /* Unplug pipe from its subport */ + s->tc_ov_n -= params->tc_ov_weight; + s->tc_ov_rate -= pipe_tc3_rate; + s->tc_ov = s->tc_ov_rate > subport_tc3_rate; + + if (s->tc_ov != tc3_ov) { + RTE_LOG(DEBUG, SCHED, + "Subport %u TC3 oversubscription is OFF (%.4lf >= %.4lf)\n", + subport_id, subport_tc3_rate, s->tc_ov_rate); + } +#endif + + /* Reset the pipe */ + memset(p, 0, sizeof(struct rte_sched_pipe)); + } + + if (deactivate) + return 0; + + /* Apply the new pipe configuration */ + p->profile = profile; + params = port->pipe_profiles + p->profile; + + /* Token Bucket (TB) */ + p->tb_time = port->time; + p->tb_credits = params->tb_size / 2; + + /* Traffic Classes (TCs) */ + p->tc_time = port->time + params->tc_period; + for (i = 0; i < RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE; i++) + p->tc_credits[i] = params->tc_credits_per_period[i]; + +#ifdef RTE_SCHED_SUBPORT_TC_OV + { + /* Subport TC3 oversubscription */ + double subport_tc3_rate = (double) s->tc_credits_per_period[3] + / (double) s->tc_period; + double pipe_tc3_rate = (double) params->tc_credits_per_period[3] + / (double) params->tc_period; + uint32_t tc3_ov = s->tc_ov; + + s->tc_ov_n += params->tc_ov_weight; + s->tc_ov_rate += pipe_tc3_rate; + s->tc_ov = s->tc_ov_rate > subport_tc3_rate; + + if (s->tc_ov != tc3_ov) { + RTE_LOG(DEBUG, SCHED, + "Subport %u TC3 oversubscription is ON (%.4lf < %.4lf)\n", + subport_id, subport_tc3_rate, s->tc_ov_rate); + } + p->tc_ov_period_id = s->tc_ov_period_id; + p->tc_ov_credits = s->tc_ov_wm; + } +#endif + + return 0; +} + +void +rte_sched_port_pkt_write(struct rte_mbuf *pkt, + uint32_t subport, uint32_t pipe, uint32_t traffic_class, + uint32_t queue, enum rte_meter_color color) +{ + struct rte_sched_port_hierarchy *sched + = (struct rte_sched_port_hierarchy *) &pkt->hash.sched; + + RTE_BUILD_BUG_ON(sizeof(*sched) > sizeof(pkt->hash.sched)); + + sched->color = (uint32_t) color; + sched->subport = subport; + sched->pipe = pipe; + sched->traffic_class = traffic_class; + sched->queue = queue; +} + +void +rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt, + uint32_t *subport, uint32_t *pipe, + uint32_t *traffic_class, uint32_t *queue) +{ + const struct rte_sched_port_hierarchy *sched + = (const struct rte_sched_port_hierarchy *) &pkt->hash.sched; + + *subport = sched->subport; + *pipe = sched->pipe; + *traffic_class = sched->traffic_class; + *queue = sched->queue; +} + +enum rte_meter_color +rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt) +{ + const struct rte_sched_port_hierarchy *sched + = (const struct rte_sched_port_hierarchy *) &pkt->hash.sched; + + return (enum rte_meter_color) sched->color; +} + +int +rte_sched_subport_read_stats(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_stats *stats, + uint32_t *tc_ov) +{ + struct rte_sched_subport *s; + + /* Check user parameters */ + if (port == NULL || subport_id >= port->n_subports_per_port || + stats == NULL || tc_ov == NULL) + return -1; + + s = port->subport + subport_id; + + /* Copy subport stats and clear */ + memcpy(stats, &s->stats, sizeof(struct rte_sched_subport_stats)); + memset(&s->stats, 0, sizeof(struct rte_sched_subport_stats)); + + /* Subport TC ovesubscription status */ + *tc_ov = s->tc_ov; + + return 0; +} + +int +rte_sched_queue_read_stats(struct rte_sched_port *port, + uint32_t queue_id, + struct rte_sched_queue_stats *stats, + uint16_t *qlen) +{ + struct rte_sched_queue *q; + struct rte_sched_queue_extra *qe; + + /* Check user parameters */ + if ((port == NULL) || + (queue_id >= rte_sched_port_queues_per_port(port)) || + (stats == NULL) || + (qlen == NULL)) { + return -1; + } + q = port->queue + queue_id; + qe = port->queue_extra + queue_id; + + /* Copy queue stats and clear */ + memcpy(stats, &qe->stats, sizeof(struct rte_sched_queue_stats)); + memset(&qe->stats, 0, sizeof(struct rte_sched_queue_stats)); + + /* Queue length */ + *qlen = q->qw - q->qr; + + return 0; +} + +static inline uint32_t +rte_sched_port_qindex(struct rte_sched_port *port, uint32_t subport, uint32_t pipe, uint32_t traffic_class, uint32_t queue) +{ + uint32_t result; + + result = subport * port->n_pipes_per_subport + pipe; + result = result * RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE + traffic_class; + result = result * RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS + queue; + + return result; +} + +#ifdef RTE_SCHED_DEBUG + +static inline int +rte_sched_port_queue_is_empty(struct rte_sched_port *port, uint32_t qindex) +{ + struct rte_sched_queue *queue = port->queue + qindex; + + return queue->qr == queue->qw; +} + +#endif /* RTE_SCHED_DEBUG */ + +#ifdef RTE_SCHED_COLLECT_STATS + +static inline void +rte_sched_port_update_subport_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt) +{ + struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port)); + uint32_t tc_index = (qindex >> 2) & 0x3; + uint32_t pkt_len = pkt->pkt_len; + + s->stats.n_pkts_tc[tc_index] += 1; + s->stats.n_bytes_tc[tc_index] += pkt_len; +} + +static inline void +rte_sched_port_update_subport_stats_on_drop(struct rte_sched_port *port, + uint32_t qindex, + struct rte_mbuf *pkt, uint32_t red) +{ + struct rte_sched_subport *s = port->subport + (qindex / rte_sched_port_queues_per_subport(port)); + uint32_t tc_index = (qindex >> 2) & 0x3; + uint32_t pkt_len = pkt->pkt_len; + + s->stats.n_pkts_tc_dropped[tc_index] += 1; + s->stats.n_bytes_tc_dropped[tc_index] += pkt_len; +#ifdef RTE_SCHED_RED + s->stats.n_pkts_red_dropped[tc_index] += red; +#endif +} + +static inline void +rte_sched_port_update_queue_stats(struct rte_sched_port *port, uint32_t qindex, struct rte_mbuf *pkt) +{ + struct rte_sched_queue_extra *qe = port->queue_extra + qindex; + uint32_t pkt_len = pkt->pkt_len; + + qe->stats.n_pkts += 1; + qe->stats.n_bytes += pkt_len; +} + +static inline void +rte_sched_port_update_queue_stats_on_drop(struct rte_sched_port *port, + uint32_t qindex, + struct rte_mbuf *pkt, uint32_t red) +{ + struct rte_sched_queue_extra *qe = port->queue_extra + qindex; + uint32_t pkt_len = pkt->pkt_len; + + qe->stats.n_pkts_dropped += 1; + qe->stats.n_bytes_dropped += pkt_len; +#ifdef RTE_SCHED_RED + qe->stats.n_pkts_red_dropped += red; +#endif +} + +#endif /* RTE_SCHED_COLLECT_STATS */ + +#ifdef RTE_SCHED_RED + +static inline int +rte_sched_port_red_drop(struct rte_sched_port *port, struct rte_mbuf *pkt, uint32_t qindex, uint16_t qlen) +{ + struct rte_sched_queue_extra *qe; + struct rte_red_config *red_cfg; + struct rte_red *red; + uint32_t tc_index; + enum rte_meter_color color; + + tc_index = (qindex >> 2) & 0x3; + color = rte_sched_port_pkt_read_color(pkt); + red_cfg = &port->red_config[tc_index][color]; + + if ((red_cfg->min_th | red_cfg->max_th) == 0) + return 0; + + qe = port->queue_extra + qindex; + red = &qe->red; + + return rte_red_enqueue(red_cfg, red, qlen, port->time); +} + +static inline void +rte_sched_port_set_queue_empty_timestamp(struct rte_sched_port *port, uint32_t qindex) +{ + struct rte_sched_queue_extra *qe = port->queue_extra + qindex; + struct rte_red *red = &qe->red; + + rte_red_mark_queue_empty(red, port->time); +} + +#else + +#define rte_sched_port_red_drop(port, pkt, qindex, qlen) 0 + +#define rte_sched_port_set_queue_empty_timestamp(port, qindex) + +#endif /* RTE_SCHED_RED */ + +#ifdef RTE_SCHED_DEBUG + +static inline void +debug_check_queue_slab(struct rte_sched_port *port, uint32_t bmp_pos, + uint64_t bmp_slab) +{ + uint64_t mask; + uint32_t i, panic; + + if (bmp_slab == 0) + rte_panic("Empty slab at position %u\n", bmp_pos); + + panic = 0; + for (i = 0, mask = 1; i < 64; i++, mask <<= 1) { + if (mask & bmp_slab) { + if (rte_sched_port_queue_is_empty(port, bmp_pos + i)) { + printf("Queue %u (slab offset %u) is empty\n", bmp_pos + i, i); + panic = 1; + } + } + } + + if (panic) + rte_panic("Empty queues in slab 0x%" PRIx64 "starting at position %u\n", + bmp_slab, bmp_pos); +} + +#endif /* RTE_SCHED_DEBUG */ + +static inline uint32_t +rte_sched_port_enqueue_qptrs_prefetch0(struct rte_sched_port *port, + struct rte_mbuf *pkt) +{ + struct rte_sched_queue *q; +#ifdef RTE_SCHED_COLLECT_STATS + struct rte_sched_queue_extra *qe; +#endif + uint32_t subport, pipe, traffic_class, queue, qindex; + + rte_sched_port_pkt_read_tree_path(pkt, &subport, &pipe, &traffic_class, &queue); + + qindex = rte_sched_port_qindex(port, subport, pipe, traffic_class, queue); + q = port->queue + qindex; + rte_prefetch0(q); +#ifdef RTE_SCHED_COLLECT_STATS + qe = port->queue_extra + qindex; + rte_prefetch0(qe); +#endif + + return qindex; +} + +static inline void +rte_sched_port_enqueue_qwa_prefetch0(struct rte_sched_port *port, + uint32_t qindex, struct rte_mbuf **qbase) +{ + struct rte_sched_queue *q; + struct rte_mbuf **q_qw; + uint16_t qsize; + + q = port->queue + qindex; + qsize = rte_sched_port_qsize(port, qindex); + q_qw = qbase + (q->qw & (qsize - 1)); + + rte_prefetch0(q_qw); + rte_bitmap_prefetch0(port->bmp, qindex); +} + +static inline int +rte_sched_port_enqueue_qwa(struct rte_sched_port *port, uint32_t qindex, + struct rte_mbuf **qbase, struct rte_mbuf *pkt) +{ + struct rte_sched_queue *q; + uint16_t qsize; + uint16_t qlen; + + q = port->queue + qindex; + qsize = rte_sched_port_qsize(port, qindex); + qlen = q->qw - q->qr; + + /* Drop the packet (and update drop stats) when queue is full */ + if (unlikely(rte_sched_port_red_drop(port, pkt, qindex, qlen) || + (qlen >= qsize))) { + rte_pktmbuf_free(pkt); +#ifdef RTE_SCHED_COLLECT_STATS + rte_sched_port_update_subport_stats_on_drop(port, qindex, pkt, + qlen < qsize); + rte_sched_port_update_queue_stats_on_drop(port, qindex, pkt, + qlen < qsize); +#endif + return 0; + } + + /* Enqueue packet */ + qbase[q->qw & (qsize - 1)] = pkt; + q->qw++; + + /* Activate queue in the port bitmap */ + rte_bitmap_set(port->bmp, qindex); + + /* Statistics */ +#ifdef RTE_SCHED_COLLECT_STATS + rte_sched_port_update_subport_stats(port, qindex, pkt); + rte_sched_port_update_queue_stats(port, qindex, pkt); +#endif + + return 1; +} + + +/* + * The enqueue function implements a 4-level pipeline with each stage + * processing two different packets. The purpose of using a pipeline + * is to hide the latency of prefetching the data structures. The + * naming convention is presented in the diagram below: + * + * p00 _______ p10 _______ p20 _______ p30 _______ + * ----->| |----->| |----->| |----->| |-----> + * | 0 | | 1 | | 2 | | 3 | + * ----->|_______|----->|_______|----->|_______|----->|_______|-----> + * p01 p11 p21 p31 + * + */ +int +rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, + uint32_t n_pkts) +{ + struct rte_mbuf *pkt00, *pkt01, *pkt10, *pkt11, *pkt20, *pkt21, + *pkt30, *pkt31, *pkt_last; + struct rte_mbuf **q00_base, **q01_base, **q10_base, **q11_base, + **q20_base, **q21_base, **q30_base, **q31_base, **q_last_base; + uint32_t q00, q01, q10, q11, q20, q21, q30, q31, q_last; + uint32_t r00, r01, r10, r11, r20, r21, r30, r31, r_last; + uint32_t result, i; + + result = 0; + + /* + * Less then 6 input packets available, which is not enough to + * feed the pipeline + */ + if (unlikely(n_pkts < 6)) { + struct rte_mbuf **q_base[5]; + uint32_t q[5]; + + /* Prefetch the mbuf structure of each packet */ + for (i = 0; i < n_pkts; i++) + rte_prefetch0(pkts[i]); + + /* Prefetch the queue structure for each queue */ + for (i = 0; i < n_pkts; i++) + q[i] = rte_sched_port_enqueue_qptrs_prefetch0(port, + pkts[i]); + + /* Prefetch the write pointer location of each queue */ + for (i = 0; i < n_pkts; i++) { + q_base[i] = rte_sched_port_qbase(port, q[i]); + rte_sched_port_enqueue_qwa_prefetch0(port, q[i], + q_base[i]); + } + + /* Write each packet to its queue */ + for (i = 0; i < n_pkts; i++) + result += rte_sched_port_enqueue_qwa(port, q[i], + q_base[i], pkts[i]); + + return result; + } + + /* Feed the first 3 stages of the pipeline (6 packets needed) */ + pkt20 = pkts[0]; + pkt21 = pkts[1]; + rte_prefetch0(pkt20); + rte_prefetch0(pkt21); + + pkt10 = pkts[2]; + pkt11 = pkts[3]; + rte_prefetch0(pkt10); + rte_prefetch0(pkt11); + + q20 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt20); + q21 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt21); + + pkt00 = pkts[4]; + pkt01 = pkts[5]; + rte_prefetch0(pkt00); + rte_prefetch0(pkt01); + + q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10); + q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11); + + q20_base = rte_sched_port_qbase(port, q20); + q21_base = rte_sched_port_qbase(port, q21); + rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base); + + /* Run the pipeline */ + for (i = 6; i < (n_pkts & (~1)); i += 2) { + /* Propagate stage inputs */ + pkt30 = pkt20; + pkt31 = pkt21; + pkt20 = pkt10; + pkt21 = pkt11; + pkt10 = pkt00; + pkt11 = pkt01; + q30 = q20; + q31 = q21; + q20 = q10; + q21 = q11; + q30_base = q20_base; + q31_base = q21_base; + + /* Stage 0: Get packets in */ + pkt00 = pkts[i]; + pkt01 = pkts[i + 1]; + rte_prefetch0(pkt00); + rte_prefetch0(pkt01); + + /* Stage 1: Prefetch queue structure storing queue pointers */ + q10 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt10); + q11 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt11); + + /* Stage 2: Prefetch queue write location */ + q20_base = rte_sched_port_qbase(port, q20); + q21_base = rte_sched_port_qbase(port, q21); + rte_sched_port_enqueue_qwa_prefetch0(port, q20, q20_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q21, q21_base); + + /* Stage 3: Write packet to queue and activate queue */ + r30 = rte_sched_port_enqueue_qwa(port, q30, q30_base, pkt30); + r31 = rte_sched_port_enqueue_qwa(port, q31, q31_base, pkt31); + result += r30 + r31; + } + + /* + * Drain the pipeline (exactly 6 packets). + * Handle the last packet in the case + * of an odd number of input packets. + */ + pkt_last = pkts[n_pkts - 1]; + rte_prefetch0(pkt_last); + + q00 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt00); + q01 = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt01); + + q10_base = rte_sched_port_qbase(port, q10); + q11_base = rte_sched_port_qbase(port, q11); + rte_sched_port_enqueue_qwa_prefetch0(port, q10, q10_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q11, q11_base); + + r20 = rte_sched_port_enqueue_qwa(port, q20, q20_base, pkt20); + r21 = rte_sched_port_enqueue_qwa(port, q21, q21_base, pkt21); + result += r20 + r21; + + q_last = rte_sched_port_enqueue_qptrs_prefetch0(port, pkt_last); + + q00_base = rte_sched_port_qbase(port, q00); + q01_base = rte_sched_port_qbase(port, q01); + rte_sched_port_enqueue_qwa_prefetch0(port, q00, q00_base); + rte_sched_port_enqueue_qwa_prefetch0(port, q01, q01_base); + + r10 = rte_sched_port_enqueue_qwa(port, q10, q10_base, pkt10); + r11 = rte_sched_port_enqueue_qwa(port, q11, q11_base, pkt11); + result += r10 + r11; + + q_last_base = rte_sched_port_qbase(port, q_last); + rte_sched_port_enqueue_qwa_prefetch0(port, q_last, q_last_base); + + r00 = rte_sched_port_enqueue_qwa(port, q00, q00_base, pkt00); + r01 = rte_sched_port_enqueue_qwa(port, q01, q01_base, pkt01); + result += r00 + r01; + + if (n_pkts & 1) { + r_last = rte_sched_port_enqueue_qwa(port, q_last, q_last_base, pkt_last); + result += r_last; + } + + return result; +} + +#ifndef RTE_SCHED_SUBPORT_TC_OV + +static inline void +grinder_credits_update(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_sched_pipe_profile *params = grinder->pipe_params; + uint64_t n_periods; + + /* Subport TB */ + n_periods = (port->time - subport->tb_time) / subport->tb_period; + subport->tb_credits += n_periods * subport->tb_credits_per_period; + subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_time += n_periods * subport->tb_period; + + /* Pipe TB */ + n_periods = (port->time - pipe->tb_time) / params->tb_period; + pipe->tb_credits += n_periods * params->tb_credits_per_period; + pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_time += n_periods * params->tb_period; + + /* Subport TCs */ + if (unlikely(port->time >= subport->tc_time)) { + subport->tc_credits[0] = subport->tc_credits_per_period[0]; + subport->tc_credits[1] = subport->tc_credits_per_period[1]; + subport->tc_credits[2] = subport->tc_credits_per_period[2]; + subport->tc_credits[3] = subport->tc_credits_per_period[3]; + subport->tc_time = port->time + subport->tc_period; + } + + /* Pipe TCs */ + if (unlikely(port->time >= pipe->tc_time)) { + pipe->tc_credits[0] = params->tc_credits_per_period[0]; + pipe->tc_credits[1] = params->tc_credits_per_period[1]; + pipe->tc_credits[2] = params->tc_credits_per_period[2]; + pipe->tc_credits[3] = params->tc_credits_per_period[3]; + pipe->tc_time = port->time + params->tc_period; + } +} + +#else + +static inline uint32_t +grinder_tc_ov_credits_update(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + uint32_t tc_ov_consumption[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + uint32_t tc_ov_consumption_max; + uint32_t tc_ov_wm = subport->tc_ov_wm; + + if (subport->tc_ov == 0) + return subport->tc_ov_wm_max; + + tc_ov_consumption[0] = subport->tc_credits_per_period[0] - subport->tc_credits[0]; + tc_ov_consumption[1] = subport->tc_credits_per_period[1] - subport->tc_credits[1]; + tc_ov_consumption[2] = subport->tc_credits_per_period[2] - subport->tc_credits[2]; + tc_ov_consumption[3] = subport->tc_credits_per_period[3] - subport->tc_credits[3]; + + tc_ov_consumption_max = subport->tc_credits_per_period[3] - + (tc_ov_consumption[0] + tc_ov_consumption[1] + tc_ov_consumption[2]); + + if (tc_ov_consumption[3] > (tc_ov_consumption_max - port->mtu)) { + tc_ov_wm -= tc_ov_wm >> 7; + if (tc_ov_wm < subport->tc_ov_wm_min) + tc_ov_wm = subport->tc_ov_wm_min; + + return tc_ov_wm; + } + + tc_ov_wm += (tc_ov_wm >> 7) + 1; + if (tc_ov_wm > subport->tc_ov_wm_max) + tc_ov_wm = subport->tc_ov_wm_max; + + return tc_ov_wm; +} + +static inline void +grinder_credits_update(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_sched_pipe_profile *params = grinder->pipe_params; + uint64_t n_periods; + + /* Subport TB */ + n_periods = (port->time - subport->tb_time) / subport->tb_period; + subport->tb_credits += n_periods * subport->tb_credits_per_period; + subport->tb_credits = rte_sched_min_val_2_u32(subport->tb_credits, subport->tb_size); + subport->tb_time += n_periods * subport->tb_period; + + /* Pipe TB */ + n_periods = (port->time - pipe->tb_time) / params->tb_period; + pipe->tb_credits += n_periods * params->tb_credits_per_period; + pipe->tb_credits = rte_sched_min_val_2_u32(pipe->tb_credits, params->tb_size); + pipe->tb_time += n_periods * params->tb_period; + + /* Subport TCs */ + if (unlikely(port->time >= subport->tc_time)) { + subport->tc_ov_wm = grinder_tc_ov_credits_update(port, pos); + + subport->tc_credits[0] = subport->tc_credits_per_period[0]; + subport->tc_credits[1] = subport->tc_credits_per_period[1]; + subport->tc_credits[2] = subport->tc_credits_per_period[2]; + subport->tc_credits[3] = subport->tc_credits_per_period[3]; + + subport->tc_time = port->time + subport->tc_period; + subport->tc_ov_period_id++; + } + + /* Pipe TCs */ + if (unlikely(port->time >= pipe->tc_time)) { + pipe->tc_credits[0] = params->tc_credits_per_period[0]; + pipe->tc_credits[1] = params->tc_credits_per_period[1]; + pipe->tc_credits[2] = params->tc_credits_per_period[2]; + pipe->tc_credits[3] = params->tc_credits_per_period[3]; + pipe->tc_time = port->time + params->tc_period; + } + + /* Pipe TCs - Oversubscription */ + if (unlikely(pipe->tc_ov_period_id != subport->tc_ov_period_id)) { + pipe->tc_ov_credits = subport->tc_ov_wm * params->tc_ov_weight; + + pipe->tc_ov_period_id = subport->tc_ov_period_id; + } +} + +#endif /* RTE_SCHED_TS_CREDITS_UPDATE, RTE_SCHED_SUBPORT_TC_OV */ + + +#ifndef RTE_SCHED_SUBPORT_TC_OV + +static inline int +grinder_credits_check(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_mbuf *pkt = grinder->pkt; + uint32_t tc_index = grinder->tc_index; + uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; + uint32_t subport_tb_credits = subport->tb_credits; + uint32_t subport_tc_credits = subport->tc_credits[tc_index]; + uint32_t pipe_tb_credits = pipe->tb_credits; + uint32_t pipe_tc_credits = pipe->tc_credits[tc_index]; + int enough_credits; + + /* Check queue credits */ + enough_credits = (pkt_len <= subport_tb_credits) && + (pkt_len <= subport_tc_credits) && + (pkt_len <= pipe_tb_credits) && + (pkt_len <= pipe_tc_credits); + + if (!enough_credits) + return 0; + + /* Update port credits */ + subport->tb_credits -= pkt_len; + subport->tc_credits[tc_index] -= pkt_len; + pipe->tb_credits -= pkt_len; + pipe->tc_credits[tc_index] -= pkt_len; + + return 1; +} + +#else + +static inline int +grinder_credits_check(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_subport *subport = grinder->subport; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_mbuf *pkt = grinder->pkt; + uint32_t tc_index = grinder->tc_index; + uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; + uint32_t subport_tb_credits = subport->tb_credits; + uint32_t subport_tc_credits = subport->tc_credits[tc_index]; + uint32_t pipe_tb_credits = pipe->tb_credits; + uint32_t pipe_tc_credits = pipe->tc_credits[tc_index]; + uint32_t pipe_tc_ov_mask1[] = {UINT32_MAX, UINT32_MAX, UINT32_MAX, pipe->tc_ov_credits}; + uint32_t pipe_tc_ov_mask2[] = {0, 0, 0, UINT32_MAX}; + uint32_t pipe_tc_ov_credits = pipe_tc_ov_mask1[tc_index]; + int enough_credits; + + /* Check pipe and subport credits */ + enough_credits = (pkt_len <= subport_tb_credits) && + (pkt_len <= subport_tc_credits) && + (pkt_len <= pipe_tb_credits) && + (pkt_len <= pipe_tc_credits) && + (pkt_len <= pipe_tc_ov_credits); + + if (!enough_credits) + return 0; + + /* Update pipe and subport credits */ + subport->tb_credits -= pkt_len; + subport->tc_credits[tc_index] -= pkt_len; + pipe->tb_credits -= pkt_len; + pipe->tc_credits[tc_index] -= pkt_len; + pipe->tc_ov_credits -= pipe_tc_ov_mask2[tc_index] & pkt_len; + + return 1; +} + +#endif /* RTE_SCHED_SUBPORT_TC_OV */ + + +static inline int +grinder_schedule(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_queue *queue = grinder->queue[grinder->qpos]; + struct rte_mbuf *pkt = grinder->pkt; + uint32_t pkt_len = pkt->pkt_len + port->frame_overhead; + + if (!grinder_credits_check(port, pos)) + return 0; + + /* Advance port time */ + port->time += pkt_len; + + /* Send packet */ + port->pkts_out[port->n_pkts_out++] = pkt; + queue->qr++; + grinder->wrr_tokens[grinder->qpos] += pkt_len * grinder->wrr_cost[grinder->qpos]; + if (queue->qr == queue->qw) { + uint32_t qindex = grinder->qindex[grinder->qpos]; + + rte_bitmap_clear(port->bmp, qindex); + grinder->qmask &= ~(1 << grinder->qpos); + grinder->wrr_mask[grinder->qpos] = 0; + rte_sched_port_set_queue_empty_timestamp(port, qindex); + } + + /* Reset pipe loop detection */ + port->pipe_loop = RTE_SCHED_PIPE_INVALID; + grinder->productive = 1; + + return 1; +} + +#ifdef SCHED_VECTOR_SSE4 + +static inline int +grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +{ + __m128i index = _mm_set1_epi32(base_pipe); + __m128i pipes = _mm_load_si128((__m128i *)port->grinder_base_bmp_pos); + __m128i res = _mm_cmpeq_epi32(pipes, index); + + pipes = _mm_load_si128((__m128i *)(port->grinder_base_bmp_pos + 4)); + pipes = _mm_cmpeq_epi32(pipes, index); + res = _mm_or_si128(res, pipes); + + if (_mm_testz_si128(res, res)) + return 0; + + return 1; +} + +#else + +static inline int +grinder_pipe_exists(struct rte_sched_port *port, uint32_t base_pipe) +{ + uint32_t i; + + for (i = 0; i < RTE_SCHED_PORT_N_GRINDERS; i++) { + if (port->grinder_base_bmp_pos[i] == base_pipe) + return 1; + } + + return 0; +} + +#endif /* RTE_SCHED_OPTIMIZATIONS */ + +static inline void +grinder_pcache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t bmp_pos, uint64_t bmp_slab) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint16_t w[4]; + + grinder->pcache_w = 0; + grinder->pcache_r = 0; + + w[0] = (uint16_t) bmp_slab; + w[1] = (uint16_t) (bmp_slab >> 16); + w[2] = (uint16_t) (bmp_slab >> 32); + w[3] = (uint16_t) (bmp_slab >> 48); + + grinder->pcache_qmask[grinder->pcache_w] = w[0]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos; + grinder->pcache_w += (w[0] != 0); + + grinder->pcache_qmask[grinder->pcache_w] = w[1]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 16; + grinder->pcache_w += (w[1] != 0); + + grinder->pcache_qmask[grinder->pcache_w] = w[2]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 32; + grinder->pcache_w += (w[2] != 0); + + grinder->pcache_qmask[grinder->pcache_w] = w[3]; + grinder->pcache_qindex[grinder->pcache_w] = bmp_pos + 48; + grinder->pcache_w += (w[3] != 0); +} + +static inline void +grinder_tccache_populate(struct rte_sched_port *port, uint32_t pos, uint32_t qindex, uint16_t qmask) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint8_t b[4]; + + grinder->tccache_w = 0; + grinder->tccache_r = 0; + + b[0] = (uint8_t) (qmask & 0xF); + b[1] = (uint8_t) ((qmask >> 4) & 0xF); + b[2] = (uint8_t) ((qmask >> 8) & 0xF); + b[3] = (uint8_t) ((qmask >> 12) & 0xF); + + grinder->tccache_qmask[grinder->tccache_w] = b[0]; + grinder->tccache_qindex[grinder->tccache_w] = qindex; + grinder->tccache_w += (b[0] != 0); + + grinder->tccache_qmask[grinder->tccache_w] = b[1]; + grinder->tccache_qindex[grinder->tccache_w] = qindex + 4; + grinder->tccache_w += (b[1] != 0); + + grinder->tccache_qmask[grinder->tccache_w] = b[2]; + grinder->tccache_qindex[grinder->tccache_w] = qindex + 8; + grinder->tccache_w += (b[2] != 0); + + grinder->tccache_qmask[grinder->tccache_w] = b[3]; + grinder->tccache_qindex[grinder->tccache_w] = qindex + 12; + grinder->tccache_w += (b[3] != 0); +} + +static inline int +grinder_next_tc(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_mbuf **qbase; + uint32_t qindex; + uint16_t qsize; + + if (grinder->tccache_r == grinder->tccache_w) + return 0; + + qindex = grinder->tccache_qindex[grinder->tccache_r]; + qbase = rte_sched_port_qbase(port, qindex); + qsize = rte_sched_port_qsize(port, qindex); + + grinder->tc_index = (qindex >> 2) & 0x3; + grinder->qmask = grinder->tccache_qmask[grinder->tccache_r]; + grinder->qsize = qsize; + + grinder->qindex[0] = qindex; + grinder->qindex[1] = qindex + 1; + grinder->qindex[2] = qindex + 2; + grinder->qindex[3] = qindex + 3; + + grinder->queue[0] = port->queue + qindex; + grinder->queue[1] = port->queue + qindex + 1; + grinder->queue[2] = port->queue + qindex + 2; + grinder->queue[3] = port->queue + qindex + 3; + + grinder->qbase[0] = qbase; + grinder->qbase[1] = qbase + qsize; + grinder->qbase[2] = qbase + 2 * qsize; + grinder->qbase[3] = qbase + 3 * qsize; + + grinder->tccache_r++; + return 1; +} + +static inline int +grinder_next_pipe(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint32_t pipe_qindex; + uint16_t pipe_qmask; + + if (grinder->pcache_r < grinder->pcache_w) { + pipe_qmask = grinder->pcache_qmask[grinder->pcache_r]; + pipe_qindex = grinder->pcache_qindex[grinder->pcache_r]; + grinder->pcache_r++; + } else { + uint64_t bmp_slab = 0; + uint32_t bmp_pos = 0; + + /* Get another non-empty pipe group */ + if (unlikely(rte_bitmap_scan(port->bmp, &bmp_pos, &bmp_slab) <= 0)) + return 0; + +#ifdef RTE_SCHED_DEBUG + debug_check_queue_slab(port, bmp_pos, bmp_slab); +#endif + + /* Return if pipe group already in one of the other grinders */ + port->grinder_base_bmp_pos[pos] = RTE_SCHED_BMP_POS_INVALID; + if (unlikely(grinder_pipe_exists(port, bmp_pos))) + return 0; + + port->grinder_base_bmp_pos[pos] = bmp_pos; + + /* Install new pipe group into grinder's pipe cache */ + grinder_pcache_populate(port, pos, bmp_pos, bmp_slab); + + pipe_qmask = grinder->pcache_qmask[0]; + pipe_qindex = grinder->pcache_qindex[0]; + grinder->pcache_r = 1; + } + + /* Install new pipe in the grinder */ + grinder->pindex = pipe_qindex >> 4; + grinder->subport = port->subport + (grinder->pindex / port->n_pipes_per_subport); + grinder->pipe = port->pipe + grinder->pindex; + grinder->pipe_params = NULL; /* to be set after the pipe structure is prefetched */ + grinder->productive = 0; + + grinder_tccache_populate(port, pos, pipe_qindex, pipe_qmask); + grinder_next_tc(port, pos); + + /* Check for pipe exhaustion */ + if (grinder->pindex == port->pipe_loop) { + port->pipe_exhaustion = 1; + port->pipe_loop = RTE_SCHED_PIPE_INVALID; + } + + return 1; +} + + +static inline void +grinder_wrr_load(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_pipe *pipe = grinder->pipe; + struct rte_sched_pipe_profile *pipe_params = grinder->pipe_params; + uint32_t tc_index = grinder->tc_index; + uint32_t qmask = grinder->qmask; + uint32_t qindex; + + qindex = tc_index * 4; + + grinder->wrr_tokens[0] = ((uint16_t) pipe->wrr_tokens[qindex]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[1] = ((uint16_t) pipe->wrr_tokens[qindex + 1]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[2] = ((uint16_t) pipe->wrr_tokens[qindex + 2]) << RTE_SCHED_WRR_SHIFT; + grinder->wrr_tokens[3] = ((uint16_t) pipe->wrr_tokens[qindex + 3]) << RTE_SCHED_WRR_SHIFT; + + grinder->wrr_mask[0] = (qmask & 0x1) * 0xFFFF; + grinder->wrr_mask[1] = ((qmask >> 1) & 0x1) * 0xFFFF; + grinder->wrr_mask[2] = ((qmask >> 2) & 0x1) * 0xFFFF; + grinder->wrr_mask[3] = ((qmask >> 3) & 0x1) * 0xFFFF; + + grinder->wrr_cost[0] = pipe_params->wrr_cost[qindex]; + grinder->wrr_cost[1] = pipe_params->wrr_cost[qindex + 1]; + grinder->wrr_cost[2] = pipe_params->wrr_cost[qindex + 2]; + grinder->wrr_cost[3] = pipe_params->wrr_cost[qindex + 3]; +} + +static inline void +grinder_wrr_store(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + struct rte_sched_pipe *pipe = grinder->pipe; + uint32_t tc_index = grinder->tc_index; + uint32_t qindex; + + qindex = tc_index * 4; + + pipe->wrr_tokens[qindex] = (grinder->wrr_tokens[0] & grinder->wrr_mask[0]) + >> RTE_SCHED_WRR_SHIFT; + pipe->wrr_tokens[qindex + 1] = (grinder->wrr_tokens[1] & grinder->wrr_mask[1]) + >> RTE_SCHED_WRR_SHIFT; + pipe->wrr_tokens[qindex + 2] = (grinder->wrr_tokens[2] & grinder->wrr_mask[2]) + >> RTE_SCHED_WRR_SHIFT; + pipe->wrr_tokens[qindex + 3] = (grinder->wrr_tokens[3] & grinder->wrr_mask[3]) + >> RTE_SCHED_WRR_SHIFT; +} + +static inline void +grinder_wrr(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint16_t wrr_tokens_min; + + grinder->wrr_tokens[0] |= ~grinder->wrr_mask[0]; + grinder->wrr_tokens[1] |= ~grinder->wrr_mask[1]; + grinder->wrr_tokens[2] |= ~grinder->wrr_mask[2]; + grinder->wrr_tokens[3] |= ~grinder->wrr_mask[3]; + + grinder->qpos = rte_min_pos_4_u16(grinder->wrr_tokens); + wrr_tokens_min = grinder->wrr_tokens[grinder->qpos]; + + grinder->wrr_tokens[0] -= wrr_tokens_min; + grinder->wrr_tokens[1] -= wrr_tokens_min; + grinder->wrr_tokens[2] -= wrr_tokens_min; + grinder->wrr_tokens[3] -= wrr_tokens_min; +} + + +#define grinder_evict(port, pos) + +static inline void +grinder_prefetch_pipe(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + + rte_prefetch0(grinder->pipe); + rte_prefetch0(grinder->queue[0]); +} + +static inline void +grinder_prefetch_tc_queue_arrays(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint16_t qsize, qr[4]; + + qsize = grinder->qsize; + qr[0] = grinder->queue[0]->qr & (qsize - 1); + qr[1] = grinder->queue[1]->qr & (qsize - 1); + qr[2] = grinder->queue[2]->qr & (qsize - 1); + qr[3] = grinder->queue[3]->qr & (qsize - 1); + + rte_prefetch0(grinder->qbase[0] + qr[0]); + rte_prefetch0(grinder->qbase[1] + qr[1]); + + grinder_wrr_load(port, pos); + grinder_wrr(port, pos); + + rte_prefetch0(grinder->qbase[2] + qr[2]); + rte_prefetch0(grinder->qbase[3] + qr[3]); +} + +static inline void +grinder_prefetch_mbuf(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + uint32_t qpos = grinder->qpos; + struct rte_mbuf **qbase = grinder->qbase[qpos]; + uint16_t qsize = grinder->qsize; + uint16_t qr = grinder->queue[qpos]->qr & (qsize - 1); + + grinder->pkt = qbase[qr]; + rte_prefetch0(grinder->pkt); + + if (unlikely((qr & 0x7) == 7)) { + uint16_t qr_next = (grinder->queue[qpos]->qr + 1) & (qsize - 1); + + rte_prefetch0(qbase + qr_next); + } +} + +static inline uint32_t +grinder_handle(struct rte_sched_port *port, uint32_t pos) +{ + struct rte_sched_grinder *grinder = port->grinder + pos; + + switch (grinder->state) { + case e_GRINDER_PREFETCH_PIPE: + { + if (grinder_next_pipe(port, pos)) { + grinder_prefetch_pipe(port, pos); + port->busy_grinders++; + + grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; + return 0; + } + + return 0; + } + + case e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS: + { + struct rte_sched_pipe *pipe = grinder->pipe; + + grinder->pipe_params = port->pipe_profiles + pipe->profile; + grinder_prefetch_tc_queue_arrays(port, pos); + grinder_credits_update(port, pos); + + grinder->state = e_GRINDER_PREFETCH_MBUF; + return 0; + } + + case e_GRINDER_PREFETCH_MBUF: + { + grinder_prefetch_mbuf(port, pos); + + grinder->state = e_GRINDER_READ_MBUF; + return 0; + } + + case e_GRINDER_READ_MBUF: + { + uint32_t result = 0; + + result = grinder_schedule(port, pos); + + /* Look for next packet within the same TC */ + if (result && grinder->qmask) { + grinder_wrr(port, pos); + grinder_prefetch_mbuf(port, pos); + + return 1; + } + grinder_wrr_store(port, pos); + + /* Look for another active TC within same pipe */ + if (grinder_next_tc(port, pos)) { + grinder_prefetch_tc_queue_arrays(port, pos); + + grinder->state = e_GRINDER_PREFETCH_MBUF; + return result; + } + + if (grinder->productive == 0 && + port->pipe_loop == RTE_SCHED_PIPE_INVALID) + port->pipe_loop = grinder->pindex; + + grinder_evict(port, pos); + + /* Look for another active pipe */ + if (grinder_next_pipe(port, pos)) { + grinder_prefetch_pipe(port, pos); + + grinder->state = e_GRINDER_PREFETCH_TC_QUEUE_ARRAYS; + return result; + } + + /* No active pipe found */ + port->busy_grinders--; + + grinder->state = e_GRINDER_PREFETCH_PIPE; + return result; + } + + default: + rte_panic("Algorithmic error (invalid state)\n"); + return 0; + } +} + +static inline void +rte_sched_port_time_resync(struct rte_sched_port *port) +{ + uint64_t cycles = rte_get_tsc_cycles(); + uint64_t cycles_diff = cycles - port->time_cpu_cycles; + uint64_t bytes_diff; + + /* Compute elapsed time in bytes */ + bytes_diff = rte_reciprocal_divide(cycles_diff << RTE_SCHED_TIME_SHIFT, + port->inv_cycles_per_byte); + + /* Advance port time */ + port->time_cpu_cycles = cycles; + port->time_cpu_bytes += bytes_diff; + if (port->time < port->time_cpu_bytes) + port->time = port->time_cpu_bytes; + + /* Reset pipe loop detection */ + port->pipe_loop = RTE_SCHED_PIPE_INVALID; +} + +static inline int +rte_sched_port_exceptions(struct rte_sched_port *port, int second_pass) +{ + int exceptions; + + /* Check if any exception flag is set */ + exceptions = (second_pass && port->busy_grinders == 0) || + (port->pipe_exhaustion == 1); + + /* Clear exception flags */ + port->pipe_exhaustion = 0; + + return exceptions; +} + +int +rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts) +{ + uint32_t i, count; + + port->pkts_out = pkts; + port->n_pkts_out = 0; + + rte_sched_port_time_resync(port); + + /* Take each queue in the grinder one step further */ + for (i = 0, count = 0; ; i++) { + count += grinder_handle(port, i & (RTE_SCHED_PORT_N_GRINDERS - 1)); + if ((count == n_pkts) || + rte_sched_port_exceptions(port, i >= RTE_SCHED_PORT_N_GRINDERS)) { + break; + } + } + + return count; +} diff --git a/lib/librte_sched/rte_sched.h b/lib/librte_sched/rte_sched.h new file mode 100644 index 00000000..e9c28172 --- /dev/null +++ b/lib/librte_sched/rte_sched.h @@ -0,0 +1,455 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_SCHED_H__ +#define __INCLUDE_RTE_SCHED_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Hierarchical Scheduler + * + * The hierarchical scheduler prioritizes the transmission of packets + * from different users and traffic classes according to the Service + * Level Agreements (SLAs) defined for the current network node. + * + * The scheduler supports thousands of packet queues grouped under a + * 5-level hierarchy: + * 1. Port: + * - Typical usage: output Ethernet port; + * - Multiple ports are scheduled in round robin order with + * equal priority; + * 2. Subport: + * - Typical usage: group of users; + * - Traffic shaping using the token bucket algorithm + * (one bucket per subport); + * - Upper limit enforced per traffic class at subport level; + * - Lower priority traffic classes able to reuse subport + * bandwidth currently unused by higher priority traffic + * classes of the same subport; + * - When any subport traffic class is oversubscribed + * (configuration time event), the usage of subport member + * pipes with high demand for thattraffic class pipes is + * truncated to a dynamically adjusted value with no + * impact to low demand pipes; + * 3. Pipe: + * - Typical usage: individual user/subscriber; + * - Traffic shaping using the token bucket algorithm + * (one bucket per pipe); + * 4. Traffic class: + * - Traffic classes of the same pipe handled in strict + * priority order; + * - Upper limit enforced per traffic class at the pipe level; + * - Lower priority traffic classes able to reuse pipe + * bandwidth currently unused by higher priority traffic + * classes of the same pipe; + * 5. Queue: + * - Typical usage: queue hosting packets from one or + * multiple connections of same traffic class belonging to + * the same user; + * - Weighted Round Robin (WRR) is used to service the + * queues within same pipe traffic class. + * + */ + +#include +#include +#include + +/** Random Early Detection (RED) */ +#ifdef RTE_SCHED_RED +#include "rte_red.h" +#endif + +/** Number of traffic classes per pipe (as well as subport). + * Cannot be changed. + */ +#define RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE 4 + +/** Number of queues per pipe traffic class. Cannot be changed. */ +#define RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS 4 + +/** Number of queues per pipe. */ +#define RTE_SCHED_QUEUES_PER_PIPE \ + (RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE * \ + RTE_SCHED_QUEUES_PER_TRAFFIC_CLASS) + +/** Maximum number of pipe profiles that can be defined per port. + * Compile-time configurable. + */ +#ifndef RTE_SCHED_PIPE_PROFILES_PER_PORT +#define RTE_SCHED_PIPE_PROFILES_PER_PORT 256 +#endif + +/* + * Ethernet framing overhead. Overhead fields per Ethernet frame: + * 1. Preamble: 7 bytes; + * 2. Start of Frame Delimiter (SFD): 1 byte; + * 3. Frame Check Sequence (FCS): 4 bytes; + * 4. Inter Frame Gap (IFG): 12 bytes. + * + * The FCS is considered overhead only if not included in the packet + * length (field pkt_len of struct rte_mbuf). + */ +#ifndef RTE_SCHED_FRAME_OVERHEAD_DEFAULT +#define RTE_SCHED_FRAME_OVERHEAD_DEFAULT 24 +#endif + +/* + * Subport configuration parameters. The period and credits_per_period + * parameters are measured in bytes, with one byte meaning the time + * duration associated with the transmission of one byte on the + * physical medium of the output port, with pipe or pipe traffic class + * rate (measured as percentage of output port rate) determined as + * credits_per_period divided by period. One credit represents one + * byte. + */ +struct rte_sched_subport_params { + /* Subport token bucket */ + uint32_t tb_rate; /**< Rate (measured in bytes per second) */ + uint32_t tb_size; /**< Size (measured in credits) */ + + /* Subport traffic classes */ + uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Traffic class rates (measured in bytes per second) */ + uint32_t tc_period; + /**< Enforcement period for rates (measured in milliseconds) */ +}; + +/** Subport statistics */ +struct rte_sched_subport_stats { + /* Packets */ + uint32_t n_pkts_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Number of packets successfully written */ + uint32_t n_pkts_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Number of packets dropped */ + + /* Bytes */ + uint32_t n_bytes_tc[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Number of bytes successfully written for each traffic class */ + uint32_t n_bytes_tc_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Number of bytes dropped for each traffic class */ + +#ifdef RTE_SCHED_RED + uint32_t n_pkts_red_dropped[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Number of packets dropped by red */ +#endif +}; + +/* + * Pipe configuration parameters. The period and credits_per_period + * parameters are measured in bytes, with one byte meaning the time + * duration associated with the transmission of one byte on the + * physical medium of the output port, with pipe or pipe traffic class + * rate (measured as percentage of output port rate) determined as + * credits_per_period divided by period. One credit represents one + * byte. + */ +struct rte_sched_pipe_params { + /* Pipe token bucket */ + uint32_t tb_rate; /**< Rate (measured in bytes per second) */ + uint32_t tb_size; /**< Size (measured in credits) */ + + /* Pipe traffic classes */ + uint32_t tc_rate[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Traffic class rates (measured in bytes per second) */ + uint32_t tc_period; + /**< Enforcement period (measured in milliseconds) */ +#ifdef RTE_SCHED_SUBPORT_TC_OV + uint8_t tc_ov_weight; /**< Weight Traffic class 3 oversubscription */ +#endif + + /* Pipe queues */ + uint8_t wrr_weights[RTE_SCHED_QUEUES_PER_PIPE]; /**< WRR weights */ +}; + +/** Queue statistics */ +struct rte_sched_queue_stats { + /* Packets */ + uint32_t n_pkts; /**< Packets successfully written */ + uint32_t n_pkts_dropped; /**< Packets dropped */ +#ifdef RTE_SCHED_RED + uint32_t n_pkts_red_dropped; /**< Packets dropped by RED */ +#endif + + /* Bytes */ + uint32_t n_bytes; /**< Bytes successfully written */ + uint32_t n_bytes_dropped; /**< Bytes dropped */ +}; + +/** Port configuration parameters. */ +struct rte_sched_port_params { + const char *name; /**< String to be associated */ + int socket; /**< CPU socket ID */ + uint32_t rate; /**< Output port rate + * (measured in bytes per second) */ + uint32_t mtu; /**< Maximum Ethernet frame size + * (measured in bytes). + * Should not include the framing overhead. */ + uint32_t frame_overhead; /**< Framing overhead per packet + * (measured in bytes) */ + uint32_t n_subports_per_port; /**< Number of subports */ + uint32_t n_pipes_per_subport; /**< Number of pipes per subport */ + uint16_t qsize[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE]; + /**< Packet queue size for each traffic class. + * All queues within the same pipe traffic class have the same + * size. Queues from different pipes serving the same traffic + * class have the same size. */ + struct rte_sched_pipe_params *pipe_profiles; + /**< Pipe profile table. + * Every pipe is configured using one of the profiles from this table. */ + uint32_t n_pipe_profiles; /**< Profiles in the pipe profile table */ +#ifdef RTE_SCHED_RED + struct rte_red_params red_params[RTE_SCHED_TRAFFIC_CLASSES_PER_PIPE][e_RTE_METER_COLORS]; /**< RED parameters */ +#endif +}; + +/* + * Configuration + * + ***/ + +/** + * Hierarchical scheduler port configuration + * + * @param params + * Port scheduler configuration parameter structure + * @return + * Handle to port scheduler instance upon success or NULL otherwise. + */ +struct rte_sched_port * +rte_sched_port_config(struct rte_sched_port_params *params); + +/** + * Hierarchical scheduler port free + * + * @param port + * Handle to port scheduler instance + */ +void +rte_sched_port_free(struct rte_sched_port *port); + +/** + * Hierarchical scheduler subport configuration + * + * @param port + * Handle to port scheduler instance + * @param subport_id + * Subport ID + * @param params + * Subport configuration parameters + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_subport_config(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_params *params); + +/** + * Hierarchical scheduler pipe configuration + * + * @param port + * Handle to port scheduler instance + * @param subport_id + * Subport ID + * @param pipe_id + * Pipe ID within subport + * @param pipe_profile + * ID of port-level pre-configured pipe profile + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_pipe_config(struct rte_sched_port *port, + uint32_t subport_id, + uint32_t pipe_id, + int32_t pipe_profile); + +/** + * Hierarchical scheduler memory footprint size per port + * + * @param params + * Port scheduler configuration parameter structure + * @return + * Memory footprint size in bytes upon success, 0 otherwise + */ +uint32_t +rte_sched_port_get_memory_footprint(struct rte_sched_port_params *params); + +/* + * Statistics + * + ***/ + +/** + * Hierarchical scheduler subport statistics read + * + * @param port + * Handle to port scheduler instance + * @param subport_id + * Subport ID + * @param stats + * Pointer to pre-allocated subport statistics structure where the statistics + * counters should be stored + * @param tc_ov + * Pointer to pre-allocated 4-entry array where the oversubscription status for + * each of the 4 subport traffic classes should be stored. + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_subport_read_stats(struct rte_sched_port *port, + uint32_t subport_id, + struct rte_sched_subport_stats *stats, + uint32_t *tc_ov); + +/** + * Hierarchical scheduler queue statistics read + * + * @param port + * Handle to port scheduler instance + * @param queue_id + * Queue ID within port scheduler + * @param stats + * Pointer to pre-allocated subport statistics structure where the statistics + * counters should be stored + * @param qlen + * Pointer to pre-allocated variable where the current queue length + * should be stored. + * @return + * 0 upon success, error code otherwise + */ +int +rte_sched_queue_read_stats(struct rte_sched_port *port, + uint32_t queue_id, + struct rte_sched_queue_stats *stats, + uint16_t *qlen); + +/** + * Scheduler hierarchy path write to packet descriptor. Typically + * called by the packet classification stage. + * + * @param pkt + * Packet descriptor handle + * @param subport + * Subport ID + * @param pipe + * Pipe ID within subport + * @param traffic_class + * Traffic class ID within pipe (0 .. 3) + * @param queue + * Queue ID within pipe traffic class (0 .. 3) + * @param color + * Packet color set + */ +void +rte_sched_port_pkt_write(struct rte_mbuf *pkt, + uint32_t subport, uint32_t pipe, uint32_t traffic_class, + uint32_t queue, enum rte_meter_color color); + +/** + * Scheduler hierarchy path read from packet descriptor (struct + * rte_mbuf). Typically called as part of the hierarchical scheduler + * enqueue operation. The subport, pipe, traffic class and queue + * parameters need to be pre-allocated by the caller. + * + * @param pkt + * Packet descriptor handle + * @param subport + * Subport ID + * @param pipe + * Pipe ID within subport + * @param traffic_class + * Traffic class ID within pipe (0 .. 3) + * @param queue + * Queue ID within pipe traffic class (0 .. 3) + * + */ +void +rte_sched_port_pkt_read_tree_path(const struct rte_mbuf *pkt, + uint32_t *subport, uint32_t *pipe, + uint32_t *traffic_class, uint32_t *queue); + +enum rte_meter_color +rte_sched_port_pkt_read_color(const struct rte_mbuf *pkt); + +/** + * Hierarchical scheduler port enqueue. Writes up to n_pkts to port + * scheduler and returns the number of packets actually written. For + * each packet, the port scheduler queue to write the packet to is + * identified by reading the hierarchy path from the packet + * descriptor; if the queue is full or congested and the packet is not + * written to the queue, then the packet is automatically dropped + * without any action required from the caller. + * + * @param port + * Handle to port scheduler instance + * @param pkts + * Array storing the packet descriptor handles + * @param n_pkts + * Number of packets to enqueue from the pkts array into the port scheduler + * @return + * Number of packets successfully enqueued + */ +int +rte_sched_port_enqueue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); + +/** + * Hierarchical scheduler port dequeue. Reads up to n_pkts from the + * port scheduler and stores them in the pkts array and returns the + * number of packets actually read. The pkts array needs to be + * pre-allocated by the caller with at least n_pkts entries. + * + * @param port + * Handle to port scheduler instance + * @param pkts + * Pre-allocated packet descriptor array where the packets dequeued + * from the port + * scheduler should be stored + * @param n_pkts + * Number of packets to dequeue from the port scheduler + * @return + * Number of packets successfully dequeued and placed in the pkts array + */ +int +rte_sched_port_dequeue(struct rte_sched_port *port, struct rte_mbuf **pkts, uint32_t n_pkts); + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_SCHED_H__ */ diff --git a/lib/librte_sched/rte_sched_common.h b/lib/librte_sched/rte_sched_common.h new file mode 100644 index 00000000..8920adec --- /dev/null +++ b/lib/librte_sched/rte_sched_common.h @@ -0,0 +1,129 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_SCHED_COMMON_H__ +#define __INCLUDE_RTE_SCHED_COMMON_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#define __rte_aligned_16 __attribute__((__aligned__(16))) + +static inline uint32_t +rte_sched_min_val_2_u32(uint32_t x, uint32_t y) +{ + return (x < y)? x : y; +} + +#if 0 +static inline uint32_t +rte_min_pos_4_u16(uint16_t *x) +{ + uint32_t pos0, pos1; + + pos0 = (x[0] <= x[1])? 0 : 1; + pos1 = (x[2] <= x[3])? 2 : 3; + + return (x[pos0] <= x[pos1])? pos0 : pos1; +} + +#else + +/* simplified version to remove branches with CMOV instruction */ +static inline uint32_t +rte_min_pos_4_u16(uint16_t *x) +{ + uint32_t pos0 = 0; + uint32_t pos1 = 2; + + if (x[1] <= x[0]) pos0 = 1; + if (x[3] <= x[2]) pos1 = 3; + if (x[pos1] <= x[pos0]) pos0 = pos1; + + return pos0; +} + +#endif + +/* + * Compute the Greatest Common Divisor (GCD) of two numbers. + * This implementation uses Euclid's algorithm: + * gcd(a, 0) = a + * gcd(a, b) = gcd(b, a mod b) + * + */ +static inline uint32_t +rte_get_gcd(uint32_t a, uint32_t b) +{ + uint32_t c; + + if (a == 0) + return b; + if (b == 0) + return a; + + if (a < b) { + c = a; + a = b; + b = c; + } + + while (b != 0) { + c = a % b; + a = b; + b = c; + } + + return a; +} + +/* + * Compute the Lowest Common Denominator (LCD) of two numbers. + * This implementation computes GCD first: + * LCD(a, b) = (a * b) / GCD(a, b) + * + */ +static inline uint32_t +rte_get_lcd(uint32_t a, uint32_t b) +{ + return (a * b) / rte_get_gcd(a, b); +} + +#ifdef __cplusplus +} +#endif + +#endif /* __INCLUDE_RTE_SCHED_COMMON_H__ */ diff --git a/lib/librte_sched/rte_sched_version.map b/lib/librte_sched/rte_sched_version.map new file mode 100644 index 00000000..3aa159ab --- /dev/null +++ b/lib/librte_sched/rte_sched_version.map @@ -0,0 +1,31 @@ +DPDK_2.0 { + global: + + rte_approx; + rte_red_config_init; + rte_red_log2_1_minus_Wq; + rte_red_pow2_frac_inv; + rte_red_rand_seed; + rte_red_rand_val; + rte_red_rt_data_init; + rte_sched_pipe_config; + rte_sched_port_config; + rte_sched_port_dequeue; + rte_sched_port_enqueue; + rte_sched_port_free; + rte_sched_port_get_memory_footprint; + rte_sched_queue_read_stats; + rte_sched_subport_config; + rte_sched_subport_read_stats; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_sched_port_pkt_write; + rte_sched_port_pkt_read_tree_path; + rte_sched_port_pkt_read_color; + +} DPDK_2.0; diff --git a/lib/librte_table/Makefile b/lib/librte_table/Makefile new file mode 100644 index 00000000..7f02af3c --- /dev/null +++ b/lib/librte_table/Makefile @@ -0,0 +1,85 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# library name +# +LIB = librte_table.a + +CFLAGS += -O3 +CFLAGS += $(WERROR_FLAGS) + +EXPORT_MAP := rte_table_version.map + +LIBABIVER := 2 + +# +# all source are stored in SRCS-y +# +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_lpm.c +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_lpm_ipv6.c +ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_acl.c +endif +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key8.c +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key16.c +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_key32.c +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_ext.c +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_hash_lru.c +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_array.c +SRCS-$(CONFIG_RTE_LIBRTE_TABLE) += rte_table_stub.c + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table.h +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_lpm.h +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_lpm_ipv6.h +ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_acl.h +endif +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_hash.h +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_lru.h +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_array.h +SYMLINK-$(CONFIG_RTE_LIBRTE_TABLE)-include += rte_table_stub.h + +# this lib depends upon: +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) := lib/librte_eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_mempool +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_port +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_lpm +ifeq ($(CONFIG_RTE_LIBRTE_ACL),y) +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_acl +endif +DEPDIRS-$(CONFIG_RTE_LIBRTE_TABLE) += lib/librte_hash + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_table/rte_lru.h b/lib/librte_table/rte_lru.h new file mode 100644 index 00000000..e87e062d --- /dev/null +++ b/lib/librte_table/rte_lru.h @@ -0,0 +1,213 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_LRU_H__ +#define __INCLUDE_RTE_LRU_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#ifdef __INTEL_COMPILER +#define GCC_VERSION (0) +#else +#define GCC_VERSION (__GNUC__ * 10000+__GNUC_MINOR__*100 + __GNUC_PATCHLEVEL__) +#endif + +#ifndef RTE_TABLE_HASH_LRU_STRATEGY +#ifdef __SSE4_2__ +#define RTE_TABLE_HASH_LRU_STRATEGY 2 +#else /* if no SSE, use simple scalar version */ +#define RTE_TABLE_HASH_LRU_STRATEGY 1 +#endif +#endif + +#ifndef RTE_ARCH_X86_64 +#undef RTE_TABLE_HASH_LRU_STRATEGY +#define RTE_TABLE_HASH_LRU_STRATEGY 1 +#endif + +#if (RTE_TABLE_HASH_LRU_STRATEGY < 0) || (RTE_TABLE_HASH_LRU_STRATEGY > 3) +#error Invalid value for RTE_TABLE_HASH_LRU_STRATEGY +#endif + +#if RTE_TABLE_HASH_LRU_STRATEGY == 0 + +#define lru_init(bucket) \ +do \ + bucket = bucket; \ +while (0) + +#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) + +#define lru_update(bucket, mru_val) \ +do { \ + bucket = bucket; \ + mru_val = mru_val; \ +} while (0) + +#elif RTE_TABLE_HASH_LRU_STRATEGY == 1 + +#define lru_init(bucket) \ +do \ + bucket->lru_list = 0x0000000100020003LLU; \ +while (0) + +#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) + +#define lru_update(bucket, mru_val) \ +do { \ + uint64_t x, pos, x0, x1, x2, mask; \ + \ + x = bucket->lru_list; \ + \ + pos = 4; \ + if ((x >> 48) == ((uint64_t) mru_val)) \ + pos = 3; \ + \ + if (((x >> 32) & 0xFFFFLLU) == ((uint64_t) mru_val)) \ + pos = 2; \ + \ + if (((x >> 16) & 0xFFFFLLU) == ((uint64_t) mru_val)) \ + pos = 1; \ + \ + if ((x & 0xFFFFLLU) == ((uint64_t) mru_val)) \ + pos = 0; \ + \ + \ + pos <<= 4; \ + mask = (~0LLU) << pos; \ + x0 = x & (~mask); \ + x1 = (x >> 16) & mask; \ + x2 = (x << (48 - pos)) & (0xFFFFLLU << 48); \ + x = x0 | x1 | x2; \ + \ + if (pos != 64) \ + bucket->lru_list = x; \ +} while (0) + +#elif RTE_TABLE_HASH_LRU_STRATEGY == 2 + +#if GCC_VERSION > 40306 +#include +#else +#include +#include +#include +#endif + +#define lru_init(bucket) \ +do \ + bucket->lru_list = 0x0000000100020003LLU; \ +while (0) + +#define lru_pos(bucket) (bucket->lru_list & 0xFFFFLLU) + +#define lru_update(bucket, mru_val) \ +do { \ + /* set up the masks for all possible shuffles, depends on pos */\ + static uint64_t masks[10] = { \ + /* Shuffle order; Make Zero (see _mm_shuffle_epi8 manual) */\ + 0x0100070605040302, 0x8080808080808080, \ + 0x0302070605040100, 0x8080808080808080, \ + 0x0504070603020100, 0x8080808080808080, \ + 0x0706050403020100, 0x8080808080808080, \ + 0x0706050403020100, 0x8080808080808080}; \ + /* load up one register with repeats of mru-val */ \ + uint64_t mru2 = mru_val; \ + uint64_t mru3 = mru2 | (mru2 << 16); \ + uint64_t lru = bucket->lru_list; \ + /* XOR to cause the word we're looking for to go to zero */ \ + uint64_t mru = lru ^ ((mru3 << 32) | mru3); \ + __m128i c = _mm_cvtsi64_si128(mru); \ + __m128i b = _mm_cvtsi64_si128(lru); \ + /* Find the minimum value (first zero word, if it's in there) */\ + __m128i d = _mm_minpos_epu16(c); \ + /* Second word is the index to found word (first word is the value) */\ + unsigned pos = _mm_extract_epi16(d, 1); \ + /* move the recently used location to top of list */ \ + __m128i k = _mm_shuffle_epi8(b, *((__m128i *) &masks[2 * pos]));\ + /* Finally, update the original list with the reordered data */ \ + bucket->lru_list = _mm_extract_epi64(k, 0); \ + /* Phwew! */ \ +} while (0) + +#elif RTE_TABLE_HASH_LRU_STRATEGY == 3 + +#if GCC_VERSION > 40306 +#include +#else +#include +#include +#include +#endif + +#define lru_init(bucket) \ +do \ + bucket->lru_list = ~0LLU; \ +while (0) + + +static inline int +f_lru_pos(uint64_t lru_list) +{ + __m128i lst = _mm_set_epi64x((uint64_t)-1, lru_list); + __m128i min = _mm_minpos_epu16(lst); + return _mm_extract_epi16(min, 1); +} +#define lru_pos(bucket) f_lru_pos(bucket->lru_list) + +#define lru_update(bucket, mru_val) \ +do { \ + const uint64_t orvals[] = {0xFFFFLLU, 0xFFFFLLU << 16, \ + 0xFFFFLLU << 32, 0xFFFFLLU << 48, 0LLU}; \ + const uint64_t decs[] = {0x1000100010001LLU, 0}; \ + __m128i lru = _mm_cvtsi64_si128(bucket->lru_list); \ + __m128i vdec = _mm_cvtsi64_si128(decs[mru_val>>2]); \ + lru = _mm_subs_epu16(lru, vdec); \ + bucket->lru_list = _mm_extract_epi64(lru, 0) | orvals[mru_val]; \ +} while (0) + +#else + +#error "Incorrect value for RTE_TABLE_HASH_LRU_STRATEGY" + +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table.h b/lib/librte_table/rte_table.h new file mode 100644 index 00000000..d3446a56 --- /dev/null +++ b/lib/librte_table/rte_table.h @@ -0,0 +1,301 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_H__ +#define __INCLUDE_RTE_TABLE_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table + * + * This tool is part of the DPDK Packet Framework tool suite and provides + * a standard interface to implement different types of lookup tables for data + * plane processing. + * + * Virtually any search algorithm that can uniquely associate data to a lookup + * key can be fitted under this lookup table abstraction. For the flow table + * use-case, the lookup key is an n-tuple of packet fields that uniquely + * identifies a traffic flow, while data represents actions and action + * meta-data associated with the same traffic flow. + * + ***/ + +#include +#include + +struct rte_mbuf; + +/** Lookup table statistics */ +struct rte_table_stats { + uint64_t n_pkts_in; + uint64_t n_pkts_lookup_miss; +}; + +/** + * Lookup table create + * + * @param params + * Parameters for lookup table creation. The underlying data structure is + * different for each lookup table type. + * @param socket_id + * CPU socket ID (e.g. for memory allocation purpose) + * @param entry_size + * Data size of each lookup table entry (measured in bytes) + * @return + * Handle to lookup table instance + */ +typedef void* (*rte_table_op_create)(void *params, int socket_id, + uint32_t entry_size); + +/** + * Lookup table free + * + * @param table + * Handle to lookup table instance + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_free)(void *table); + +/** + * Lookup table entry add + * + * @param table + * Handle to lookup table instance + * @param key + * Lookup key + * @param entry + * Data to be associated with the current key. This parameter has to point to + * a valid memory buffer where the first entry_size bytes (table create + * parameter) are populated with the data. + * @param key_found + * After successful invocation, *key_found is set to a value different than 0 + * if the current key is already present in the table and to 0 if not. This + * pointer has to be set to a valid memory location before the table entry add + * function is called. + * @param entry_ptr + * After successful invocation, *entry_ptr stores the handle to the table + * entry containing the data associated with the current key. This handle can + * be used to perform further read-write accesses to this entry. This handle + * is valid until the key is deleted from the table or the same key is + * re-added to the table, typically to associate it with different data. This + * pointer has to be set to a valid memory location before the function is + * called. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_add)( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr); + +/** + * Lookup table entry delete + * + * @param table + * Handle to lookup table instance + * @param key + * Lookup key + * @param key_found + * After successful invocation, *key_found is set to a value different than 0 + * if the current key was present in the table before the delete operation + * was performed and to 0 if not. This pointer has to be set to a valid + * memory location before the table entry delete function is called. + * @param entry + * After successful invocation, if the key is found in the table (*key found + * is different than 0 after function call is completed) and entry points to + * a valid buffer (entry is set to a value different than NULL before the + * function is called), then the first entry_size bytes (table create + * parameter) in *entry store a copy of table entry that contained the data + * associated with the current key before the key was deleted. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_delete)( + void *table, + void *key, + int *key_found, + void *entry); + +/** + * Lookup table entry add bulk + * + * @param table + * Handle to lookup table instance + * @param key + * Array containing lookup keys + * @param entries + * Array containing data to be associated with each key. Every item in the + * array has to point to a valid memory buffer where the first entry_size + * bytes (table create parameter) are populated with the data. + * @param n_keys + * Number of keys to add + * @param key_found + * After successful invocation, key_found for every item in the array is set + * to a value different than 0 if the current key is already present in the + * table and to 0 if not. This pointer has to be set to a valid memory + * location before the table entry add function is called. + * @param entries_ptr + * After successful invocation, array *entries_ptr stores the handle to the + * table entry containing the data associated with every key. This handle can + * be used to perform further read-write accesses to this entry. This handle + * is valid until the key is deleted from the table or the same key is + * re-added to the table, typically to associate it with different data. This + * pointer has to be set to a valid memory location before the function is + * called. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_add_bulk)( + void *table, + void **keys, + void **entries, + uint32_t n_keys, + int *key_found, + void **entries_ptr); + +/** + * Lookup table entry delete bulk + * + * @param table + * Handle to lookup table instance + * @param key + * Array containing lookup keys + * @param n_keys + * Number of keys to delete + * @param key_found + * After successful invocation, key_found for every item in the array is set + * to a value different than 0if the current key was present in the table + * before the delete operation was performed and to 0 if not. This pointer + * has to be set to a valid memory location before the table entry delete + * function is called. + * @param entries + * If entries pointer is NULL, this pointer is ignored for every entry found. + * Else, after successful invocation, if specific key is found in the table + * (key_found is different than 0 for this item after function call is + * completed) and item of entry array points to a valid buffer (entry is set + * to a value different than NULL before the function is called), then the + * first entry_size bytes (table create parameter) in *entry store a copy of + * table entry that contained the data associated with the current key before + * the key was deleted. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_entry_delete_bulk)( + void *table, + void **keys, + uint32_t n_keys, + int *key_found, + void **entries); + +/** + * Lookup table lookup + * + * @param table + * Handle to lookup table instance + * @param pkts + * Burst of input packets specified as array of up to 64 pointers to struct + * rte_mbuf + * @param pkts_mask + * 64-bit bitmask specifying which packets in the input burst are valid. When + * pkts_mask bit n is set, then element n of pkts array is pointing to a + * valid packet. Otherwise, element n of pkts array does not point to a valid + * packet, therefore it will not be accessed. + * @param lookup_hit_mask + * Once the table lookup operation is completed, this 64-bit bitmask + * specifies which of the valid packets in the input burst resulted in lookup + * hit. For each valid input packet (pkts_mask bit n is set), the following + * are true on lookup hit: lookup_hit_mask bit n is set, element n of entries + * array is valid and it points to the lookup table entry that was hit. For + * each valid input packet (pkts_mask bit n is set), the following are true + * on lookup miss: lookup_hit_mask bit n is not set and element n of entries + * array is not valid. + * @param entries + * Once the table lookup operation is completed, this array provides the + * lookup table entries that were hit, as described above. It is required + * that this array is always pre-allocated by the caller of this function + * with exactly 64 elements. The implementation is allowed to speculatively + * modify the elements of this array, so elements marked as invalid in + * lookup_hit_mask once the table lookup operation is completed might have + * been modified by this function. + * @return + * 0 on success, error code otherwise + */ +typedef int (*rte_table_op_lookup)( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries); + +/** + * Lookup table stats read + * + * @param table + * Handle to lookup table instance + * @param stats + * Handle to table stats struct to copy data + * @param clear + * Flag indicating that stats should be cleared after read + * + * @return + * Error code or 0 on success. + */ +typedef int (*rte_table_op_stats_read)( + void *table, + struct rte_table_stats *stats, + int clear); + +/** Lookup table interface defining the lookup table operation */ +struct rte_table_ops { + rte_table_op_create f_create; /**< Create */ + rte_table_op_free f_free; /**< Free */ + rte_table_op_entry_add f_add; /**< Entry add */ + rte_table_op_entry_delete f_delete; /**< Entry delete */ + rte_table_op_entry_add_bulk f_add_bulk; /**< Add entry bulk */ + rte_table_op_entry_delete_bulk f_delete_bulk; /**< Delete entry bulk */ + rte_table_op_lookup f_lookup; /**< Lookup */ + rte_table_op_stats_read f_stats; /**< Stats */ +}; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table_acl.c b/lib/librte_table/rte_table_acl.c new file mode 100644 index 00000000..c1eb8488 --- /dev/null +++ b/lib/librte_table/rte_table_acl.c @@ -0,0 +1,835 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "rte_table_acl.h" +#include + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_ACL_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_ACL_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_table_acl { + struct rte_table_stats stats; + + /* Low-level ACL table */ + char name[2][RTE_ACL_NAMESIZE]; + struct rte_acl_param acl_params; /* for creating low level acl table */ + struct rte_acl_config cfg; /* Holds the field definitions (metadata) */ + struct rte_acl_ctx *ctx; + uint32_t name_id; + + /* Input parameters */ + uint32_t n_rules; + uint32_t entry_size; + + /* Internal tables */ + uint8_t *action_table; + struct rte_acl_rule **acl_rule_list; /* Array of pointers to rules */ + uint8_t *acl_rule_memory; /* Memory to store the rules */ + + /* Memory to store the action table and stack of free entries */ + uint8_t memory[0] __rte_cache_aligned; +}; + + +static void * +rte_table_acl_create( + void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_table_acl_params *p = (struct rte_table_acl_params *) params; + struct rte_table_acl *acl; + uint32_t action_table_size, acl_rule_list_size, acl_rule_memory_size; + uint32_t total_size; + + RTE_BUILD_BUG_ON(((sizeof(struct rte_table_acl) % RTE_CACHE_LINE_SIZE) + != 0)); + + /* Check input parameters */ + if (p == NULL) { + RTE_LOG(ERR, TABLE, "%s: Invalid value for params\n", __func__); + return NULL; + } + if (p->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: Invalid value for name\n", __func__); + return NULL; + } + if (p->n_rules == 0) { + RTE_LOG(ERR, TABLE, "%s: Invalid value for n_rules\n", + __func__); + return NULL; + } + if ((p->n_rule_fields == 0) || + (p->n_rule_fields > RTE_ACL_MAX_FIELDS)) { + RTE_LOG(ERR, TABLE, "%s: Invalid value for n_rule_fields\n", + __func__); + return NULL; + } + + entry_size = RTE_ALIGN(entry_size, sizeof(uint64_t)); + + /* Memory allocation */ + action_table_size = RTE_CACHE_LINE_ROUNDUP(p->n_rules * entry_size); + acl_rule_list_size = + RTE_CACHE_LINE_ROUNDUP(p->n_rules * sizeof(struct rte_acl_rule *)); + acl_rule_memory_size = RTE_CACHE_LINE_ROUNDUP(p->n_rules * + RTE_ACL_RULE_SZ(p->n_rule_fields)); + total_size = sizeof(struct rte_table_acl) + action_table_size + + acl_rule_list_size + acl_rule_memory_size; + + acl = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, + socket_id); + if (acl == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for ACL table\n", + __func__, total_size); + return NULL; + } + + acl->action_table = &acl->memory[0]; + acl->acl_rule_list = + (struct rte_acl_rule **) &acl->memory[action_table_size]; + acl->acl_rule_memory = (uint8_t *) + &acl->memory[action_table_size + acl_rule_list_size]; + + /* Initialization of internal fields */ + snprintf(acl->name[0], RTE_ACL_NAMESIZE, "%s_a", p->name); + snprintf(acl->name[1], RTE_ACL_NAMESIZE, "%s_b", p->name); + acl->name_id = 1; + + acl->acl_params.name = acl->name[acl->name_id]; + acl->acl_params.socket_id = socket_id; + acl->acl_params.rule_size = RTE_ACL_RULE_SZ(p->n_rule_fields); + acl->acl_params.max_rule_num = p->n_rules; + + acl->cfg.num_categories = 1; + acl->cfg.num_fields = p->n_rule_fields; + memcpy(&acl->cfg.defs[0], &p->field_format[0], + p->n_rule_fields * sizeof(struct rte_acl_field_def)); + + acl->ctx = NULL; + + acl->n_rules = p->n_rules; + acl->entry_size = entry_size; + + return acl; +} + +static int +rte_table_acl_free(void *table) +{ + struct rte_table_acl *acl = (struct rte_table_acl *) table; + + /* Check input parameters */ + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + /* Free previously allocated resources */ + if (acl->ctx != NULL) + rte_acl_free(acl->ctx); + + rte_free(acl); + + return 0; +} + +RTE_ACL_RULE_DEF(rte_pipeline_acl_rule, RTE_ACL_MAX_FIELDS); + +static int +rte_table_acl_build(struct rte_table_acl *acl, struct rte_acl_ctx **acl_ctx) +{ + struct rte_acl_ctx *ctx = NULL; + uint32_t n_rules, i; + int status; + + /* Create low level ACL table */ + ctx = rte_acl_create(&acl->acl_params); + if (ctx == NULL) { + RTE_LOG(ERR, TABLE, "%s: Cannot create low level ACL table\n", + __func__); + return -1; + } + + /* Add rules to low level ACL table */ + n_rules = 0; + for (i = 1; i < acl->n_rules; i++) { + if (acl->acl_rule_list[i] != NULL) { + status = rte_acl_add_rules(ctx, acl->acl_rule_list[i], + 1); + if (status != 0) { + RTE_LOG(ERR, TABLE, + "%s: Cannot add rule to low level ACL table\n", + __func__); + rte_acl_free(ctx); + return -1; + } + + n_rules++; + } + } + + if (n_rules == 0) { + rte_acl_free(ctx); + *acl_ctx = NULL; + return 0; + } + + /* Build low level ACl table */ + status = rte_acl_build(ctx, &acl->cfg); + if (status != 0) { + RTE_LOG(ERR, TABLE, + "%s: Cannot build the low level ACL table\n", + __func__); + rte_acl_free(ctx); + return -1; + } + + rte_acl_dump(ctx); + + *acl_ctx = ctx; + return 0; +} + +static int +rte_table_acl_entry_add( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl_rule_add_params *rule = + (struct rte_table_acl_rule_add_params *) key; + struct rte_pipeline_acl_rule acl_rule; + struct rte_acl_rule *rule_location; + struct rte_acl_ctx *ctx; + uint32_t free_pos, free_pos_valid, i; + int status; + + /* Check input parameters */ + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + if (entry == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + return -EINVAL; + } + if (key_found == NULL) { + RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n", + __func__); + return -EINVAL; + } + if (entry_ptr == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry_ptr parameter is NULL\n", + __func__); + return -EINVAL; + } + if (rule->priority > RTE_ACL_MAX_PRIORITY) { + RTE_LOG(ERR, TABLE, "%s: Priority is too high\n", __func__); + return -EINVAL; + } + + /* Setup rule data structure */ + memset(&acl_rule, 0, sizeof(acl_rule)); + acl_rule.data.category_mask = 1; + acl_rule.data.priority = RTE_ACL_MAX_PRIORITY - rule->priority; + acl_rule.data.userdata = 0; /* To be set up later */ + memcpy(&acl_rule.field[0], + &rule->field_value[0], + acl->cfg.num_fields * sizeof(struct rte_acl_field)); + + /* Look to see if the rule exists already in the table */ + free_pos = 0; + free_pos_valid = 0; + for (i = 1; i < acl->n_rules; i++) { + if (acl->acl_rule_list[i] == NULL) { + if (free_pos_valid == 0) { + free_pos = i; + free_pos_valid = 1; + } + + continue; + } + + /* Compare the key fields */ + status = memcmp(&acl->acl_rule_list[i]->field[0], + &rule->field_value[0], + acl->cfg.num_fields * sizeof(struct rte_acl_field)); + + /* Rule found: update data associated with the rule */ + if (status == 0) { + *key_found = 1; + *entry_ptr = &acl->memory[i * acl->entry_size]; + memcpy(*entry_ptr, entry, acl->entry_size); + + return 0; + } + } + + /* Return if max rules */ + if (free_pos_valid == 0) { + RTE_LOG(ERR, TABLE, "%s: Max number of rules reached\n", + __func__); + return -ENOSPC; + } + + /* Add the new rule to the rule set */ + acl_rule.data.userdata = free_pos; + rule_location = (struct rte_acl_rule *) + &acl->acl_rule_memory[free_pos * acl->acl_params.rule_size]; + memcpy(rule_location, &acl_rule, acl->acl_params.rule_size); + acl->acl_rule_list[free_pos] = rule_location; + + /* Build low level ACL table */ + acl->name_id ^= 1; + acl->acl_params.name = acl->name[acl->name_id]; + status = rte_table_acl_build(acl, &ctx); + if (status != 0) { + /* Roll back changes */ + acl->acl_rule_list[free_pos] = NULL; + acl->name_id ^= 1; + + return -EINVAL; + } + + /* Commit changes */ + if (acl->ctx != NULL) + rte_acl_free(acl->ctx); + acl->ctx = ctx; + *key_found = 0; + *entry_ptr = &acl->memory[free_pos * acl->entry_size]; + memcpy(*entry_ptr, entry, acl->entry_size); + + return 0; +} + +static int +rte_table_acl_entry_delete( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_table_acl_rule_delete_params *rule = + (struct rte_table_acl_rule_delete_params *) key; + struct rte_acl_rule *deleted_rule = NULL; + struct rte_acl_ctx *ctx; + uint32_t pos, pos_valid, i; + int status; + + /* Check input parameters */ + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + if (key_found == NULL) { + RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n", + __func__); + return -EINVAL; + } + + /* Look for the rule in the table */ + pos = 0; + pos_valid = 0; + for (i = 1; i < acl->n_rules; i++) { + if (acl->acl_rule_list[i] != NULL) { + /* Compare the key fields */ + status = memcmp(&acl->acl_rule_list[i]->field[0], + &rule->field_value[0], acl->cfg.num_fields * + sizeof(struct rte_acl_field)); + + /* Rule found: remove from table */ + if (status == 0) { + pos = i; + pos_valid = 1; + + deleted_rule = acl->acl_rule_list[i]; + acl->acl_rule_list[i] = NULL; + } + } + } + + /* Return if rule not found */ + if (pos_valid == 0) { + *key_found = 0; + return 0; + } + + /* Build low level ACL table */ + acl->name_id ^= 1; + acl->acl_params.name = acl->name[acl->name_id]; + status = rte_table_acl_build(acl, &ctx); + if (status != 0) { + /* Roll back changes */ + acl->acl_rule_list[pos] = deleted_rule; + acl->name_id ^= 1; + + return -EINVAL; + } + + /* Commit changes */ + if (acl->ctx != NULL) + rte_acl_free(acl->ctx); + + acl->ctx = ctx; + *key_found = 1; + if (entry != NULL) + memcpy(entry, &acl->memory[pos * acl->entry_size], + acl->entry_size); + + return 0; +} + +static int +rte_table_acl_entry_add_bulk( + void *table, + void **keys, + void **entries, + uint32_t n_keys, + int *key_found, + void **entries_ptr) +{ + struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_acl_ctx *ctx; + uint32_t rule_pos[n_keys]; + uint32_t i; + int err = 0, build = 0; + int status; + + /* Check input parameters */ + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (keys == NULL) { + RTE_LOG(ERR, TABLE, "%s: keys parameter is NULL\n", __func__); + return -EINVAL; + } + if (entries == NULL) { + RTE_LOG(ERR, TABLE, "%s: entries parameter is NULL\n", __func__); + return -EINVAL; + } + if (n_keys == 0) { + RTE_LOG(ERR, TABLE, "%s: 0 rules to add\n", __func__); + return -EINVAL; + } + if (key_found == NULL) { + RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n", + __func__); + return -EINVAL; + } + if (entries_ptr == NULL) { + RTE_LOG(ERR, TABLE, "%s: entries_ptr parameter is NULL\n", + __func__); + return -EINVAL; + } + + /* Check input parameters in arrays */ + for (i = 0; i < n_keys; i++) { + struct rte_table_acl_rule_add_params *rule; + + if (keys[i] == NULL) { + RTE_LOG(ERR, TABLE, "%s: keys[%" PRIu32 "] parameter is NULL\n", + __func__, i); + return -EINVAL; + } + + if (entries[i] == NULL) { + RTE_LOG(ERR, TABLE, "%s: entries[%" PRIu32 "] parameter is NULL\n", + __func__, i); + return -EINVAL; + } + + if (entries_ptr[i] == NULL) { + RTE_LOG(ERR, TABLE, "%s: entries_ptr[%" PRIu32 "] parameter is NULL\n", + __func__, i); + return -EINVAL; + } + + rule = (struct rte_table_acl_rule_add_params *) keys[i]; + if (rule->priority > RTE_ACL_MAX_PRIORITY) { + RTE_LOG(ERR, TABLE, "%s: Priority is too high\n", __func__); + return -EINVAL; + } + } + + memset(rule_pos, 0, n_keys * sizeof(uint32_t)); + memset(key_found, 0, n_keys * sizeof(int)); + for (i = 0; i < n_keys; i++) { + struct rte_table_acl_rule_add_params *rule = + (struct rte_table_acl_rule_add_params *) keys[i]; + struct rte_pipeline_acl_rule acl_rule; + struct rte_acl_rule *rule_location; + uint32_t free_pos, free_pos_valid, j; + + /* Setup rule data structure */ + memset(&acl_rule, 0, sizeof(acl_rule)); + acl_rule.data.category_mask = 1; + acl_rule.data.priority = RTE_ACL_MAX_PRIORITY - rule->priority; + acl_rule.data.userdata = 0; /* To be set up later */ + memcpy(&acl_rule.field[0], + &rule->field_value[0], + acl->cfg.num_fields * sizeof(struct rte_acl_field)); + + /* Look to see if the rule exists already in the table */ + free_pos = 0; + free_pos_valid = 0; + for (j = 1; j < acl->n_rules; j++) { + if (acl->acl_rule_list[j] == NULL) { + if (free_pos_valid == 0) { + free_pos = j; + free_pos_valid = 1; + } + + continue; + } + + /* Compare the key fields */ + status = memcmp(&acl->acl_rule_list[j]->field[0], + &rule->field_value[0], + acl->cfg.num_fields * sizeof(struct rte_acl_field)); + + /* Rule found: update data associated with the rule */ + if (status == 0) { + key_found[i] = 1; + entries_ptr[i] = &acl->memory[j * acl->entry_size]; + memcpy(entries_ptr[i], entries[i], acl->entry_size); + + break; + } + } + + /* Key already in the table */ + if (key_found[i] != 0) + continue; + + /* Maximum number of rules reached */ + if (free_pos_valid == 0) { + err = 1; + break; + } + + /* Add the new rule to the rule set */ + acl_rule.data.userdata = free_pos; + rule_location = (struct rte_acl_rule *) + &acl->acl_rule_memory[free_pos * acl->acl_params.rule_size]; + memcpy(rule_location, &acl_rule, acl->acl_params.rule_size); + acl->acl_rule_list[free_pos] = rule_location; + rule_pos[i] = free_pos; + build = 1; + } + + if (err != 0) { + for (i = 0; i < n_keys; i++) { + if (rule_pos[i] == 0) + continue; + + acl->acl_rule_list[rule_pos[i]] = NULL; + } + + return -ENOSPC; + } + + if (build == 0) + return 0; + + /* Build low level ACL table */ + acl->name_id ^= 1; + acl->acl_params.name = acl->name[acl->name_id]; + status = rte_table_acl_build(acl, &ctx); + if (status != 0) { + /* Roll back changes */ + for (i = 0; i < n_keys; i++) { + if (rule_pos[i] == 0) + continue; + + acl->acl_rule_list[rule_pos[i]] = NULL; + } + acl->name_id ^= 1; + + return -EINVAL; + } + + /* Commit changes */ + if (acl->ctx != NULL) + rte_acl_free(acl->ctx); + acl->ctx = ctx; + + for (i = 0; i < n_keys; i++) { + if (rule_pos[i] == 0) + continue; + + key_found[i] = 0; + entries_ptr[i] = &acl->memory[rule_pos[i] * acl->entry_size]; + memcpy(entries_ptr[i], entries[i], acl->entry_size); + } + + return 0; +} + +static int +rte_table_acl_entry_delete_bulk( + void *table, + void **keys, + uint32_t n_keys, + int *key_found, + void **entries) +{ + struct rte_table_acl *acl = (struct rte_table_acl *) table; + struct rte_acl_rule *deleted_rules[n_keys]; + uint32_t rule_pos[n_keys]; + struct rte_acl_ctx *ctx; + uint32_t i; + int status; + int build = 0; + + /* Check input parameters */ + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (keys == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + if (n_keys == 0) { + RTE_LOG(ERR, TABLE, "%s: 0 rules to delete\n", __func__); + return -EINVAL; + } + if (key_found == NULL) { + RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n", + __func__); + return -EINVAL; + } + + for (i = 0; i < n_keys; i++) { + if (keys[i] == NULL) { + RTE_LOG(ERR, TABLE, "%s: keys[%" PRIu32 "] parameter is NULL\n", + __func__, i); + return -EINVAL; + } + } + + memset(deleted_rules, 0, n_keys * sizeof(struct rte_acl_rule *)); + memset(rule_pos, 0, n_keys * sizeof(uint32_t)); + for (i = 0; i < n_keys; i++) { + struct rte_table_acl_rule_delete_params *rule = + (struct rte_table_acl_rule_delete_params *) keys[i]; + uint32_t pos_valid, j; + + /* Look for the rule in the table */ + pos_valid = 0; + for (j = 1; j < acl->n_rules; j++) { + if (acl->acl_rule_list[j] == NULL) + continue; + + /* Compare the key fields */ + status = memcmp(&acl->acl_rule_list[j]->field[0], + &rule->field_value[0], + acl->cfg.num_fields * sizeof(struct rte_acl_field)); + + /* Rule found: remove from table */ + if (status == 0) { + pos_valid = 1; + + deleted_rules[i] = acl->acl_rule_list[j]; + acl->acl_rule_list[j] = NULL; + rule_pos[i] = j; + + build = 1; + } + } + + if (pos_valid == 0) { + key_found[i] = 0; + continue; + } + } + + /* Return if no changes to acl table */ + if (build == 0) { + return 0; + } + + /* Build low level ACL table */ + acl->name_id ^= 1; + acl->acl_params.name = acl->name[acl->name_id]; + status = rte_table_acl_build(acl, &ctx); + if (status != 0) { + /* Roll back changes */ + for (i = 0; i < n_keys; i++) { + if (rule_pos[i] == 0) + continue; + + acl->acl_rule_list[rule_pos[i]] = deleted_rules[i]; + } + + acl->name_id ^= 1; + + return -EINVAL; + } + + /* Commit changes */ + if (acl->ctx != NULL) + rte_acl_free(acl->ctx); + + acl->ctx = ctx; + for (i = 0; i < n_keys; i++) { + if (rule_pos[i] == 0) + continue; + + key_found[i] = 1; + if (entries != NULL && entries[i] != NULL) + memcpy(entries[i], &acl->memory[rule_pos[i] * acl->entry_size], + acl->entry_size); + } + + return 0; +} + +static int +rte_table_acl_lookup( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_acl *acl = (struct rte_table_acl *) table; + const uint8_t *pkts_data[RTE_PORT_IN_BURST_SIZE_MAX]; + uint32_t results[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t pkts_out_mask; + uint32_t n_pkts, i, j; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_ACL_STATS_PKTS_IN_ADD(acl, n_pkts_in); + + /* Input conversion */ + for (i = 0, j = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX - + __builtin_clzll(pkts_mask)); i++) { + uint64_t pkt_mask = 1LLU << i; + + if (pkt_mask & pkts_mask) { + pkts_data[j] = rte_pktmbuf_mtod(pkts[i], uint8_t *); + j++; + } + } + n_pkts = j; + + /* Low-level ACL table lookup */ + if (acl->ctx != NULL) + rte_acl_classify(acl->ctx, pkts_data, results, n_pkts, 1); + else + n_pkts = 0; + + /* Output conversion */ + pkts_out_mask = 0; + for (i = 0; i < n_pkts; i++) { + uint32_t action_table_pos = results[i]; + uint32_t pkt_pos = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_pos; + + pkts_mask &= ~pkt_mask; + + if (action_table_pos != RTE_ACL_INVALID_USERDATA) { + pkts_out_mask |= pkt_mask; + entries[pkt_pos] = (void *) + &acl->memory[action_table_pos * + acl->entry_size]; + rte_prefetch0(entries[pkt_pos]); + } + } + + *lookup_hit_mask = pkts_out_mask; + RTE_TABLE_ACL_STATS_PKTS_LOOKUP_MISS(acl, n_pkts_in - __builtin_popcountll(pkts_out_mask)); + + return 0; +} + +static int +rte_table_acl_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_acl *acl = (struct rte_table_acl *) table; + + if (stats != NULL) + memcpy(stats, &acl->stats, sizeof(acl->stats)); + + if (clear) + memset(&acl->stats, 0, sizeof(acl->stats)); + + return 0; +} + +struct rte_table_ops rte_table_acl_ops = { + .f_create = rte_table_acl_create, + .f_free = rte_table_acl_free, + .f_add = rte_table_acl_entry_add, + .f_delete = rte_table_acl_entry_delete, + .f_add_bulk = rte_table_acl_entry_add_bulk, + .f_delete_bulk = rte_table_acl_entry_delete_bulk, + .f_lookup = rte_table_acl_lookup, + .f_stats = rte_table_acl_stats_read, +}; diff --git a/lib/librte_table/rte_table_acl.h b/lib/librte_table/rte_table_acl.h new file mode 100644 index 00000000..a9cc0328 --- /dev/null +++ b/lib/librte_table/rte_table_acl.h @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_ACL_H__ +#define __INCLUDE_RTE_TABLE_ACL_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table ACL + * + * This table uses the Access Control List (ACL) algorithm to uniquely + * associate data to lookup keys. + * + * Use-cases: Firewall rule database, etc. + * + ***/ + +#include + +#include "rte_acl.h" + +#include "rte_table.h" + +/** ACL table parameters */ +struct rte_table_acl_params { + /** Name */ + const char *name; + + /** Maximum number of ACL rules in the table */ + uint32_t n_rules; + + /** Number of fields in the ACL rule specification */ + uint32_t n_rule_fields; + + /** Format specification of the fields of the ACL rule */ + struct rte_acl_field_def field_format[RTE_ACL_MAX_FIELDS]; +}; + +/** ACL rule specification for entry add operation */ +struct rte_table_acl_rule_add_params { + /** ACL rule priority, with 0 as the highest priority */ + int32_t priority; + + /** Values for the fields of the ACL rule to be added to the table */ + struct rte_acl_field field_value[RTE_ACL_MAX_FIELDS]; +}; + +/** ACL rule specification for entry delete operation */ +struct rte_table_acl_rule_delete_params { + /** Values for the fields of the ACL rule to be deleted from table */ + struct rte_acl_field field_value[RTE_ACL_MAX_FIELDS]; +}; + +/** ACL table operations */ +extern struct rte_table_ops rte_table_acl_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table_array.c b/lib/librte_table/rte_table_array.c new file mode 100644 index 00000000..3bb68d11 --- /dev/null +++ b/lib/librte_table/rte_table_array.c @@ -0,0 +1,237 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "rte_table_array.h" + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_ARRAY_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_ARRAY_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_ARRAY_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_ARRAY_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_table_array { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t entry_size; + uint32_t n_entries; + uint32_t offset; + + /* Internal fields */ + uint32_t entry_pos_mask; + + /* Internal table */ + uint8_t array[0] __rte_cache_aligned; +} __rte_cache_aligned; + +static void * +rte_table_array_create(void *params, int socket_id, uint32_t entry_size) +{ + struct rte_table_array_params *p = + (struct rte_table_array_params *) params; + struct rte_table_array *t; + uint32_t total_cl_size, total_size; + + /* Check input parameters */ + if ((p == NULL) || + (p->n_entries == 0) || + (!rte_is_power_of_2(p->n_entries))) + return NULL; + + /* Memory allocation */ + total_cl_size = (sizeof(struct rte_table_array) + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_cl_size += (p->n_entries * entry_size + + RTE_CACHE_LINE_SIZE) / RTE_CACHE_LINE_SIZE; + total_size = total_cl_size * RTE_CACHE_LINE_SIZE; + t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (t == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for array table\n", + __func__, total_size); + return NULL; + } + + /* Memory initialization */ + t->entry_size = entry_size; + t->n_entries = p->n_entries; + t->offset = p->offset; + t->entry_pos_mask = t->n_entries - 1; + + return t; +} + +static int +rte_table_array_free(void *table) +{ + struct rte_table_array *t = (struct rte_table_array *) table; + + /* Check input parameters */ + if (t == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + /* Free previously allocated resources */ + rte_free(t); + + return 0; +} + +static int +rte_table_array_entry_add( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_array *t = (struct rte_table_array *) table; + struct rte_table_array_key *k = (struct rte_table_array_key *) key; + uint8_t *table_entry; + + /* Check input parameters */ + if (table == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (key == NULL) { + RTE_LOG(ERR, TABLE, "%s: key parameter is NULL\n", __func__); + return -EINVAL; + } + if (entry == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + return -EINVAL; + } + if (key_found == NULL) { + RTE_LOG(ERR, TABLE, "%s: key_found parameter is NULL\n", + __func__); + return -EINVAL; + } + if (entry_ptr == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry_ptr parameter is NULL\n", + __func__); + return -EINVAL; + } + + table_entry = &t->array[k->pos * t->entry_size]; + memcpy(table_entry, entry, t->entry_size); + *key_found = 1; + *entry_ptr = (void *) table_entry; + + return 0; +} + +static int +rte_table_array_lookup( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_array *t = (struct rte_table_array *) table; + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_ARRAY_STATS_PKTS_IN_ADD(t, n_pkts_in); + *lookup_hit_mask = pkts_mask; + + if ((pkts_mask & (pkts_mask + 1)) == 0) { + uint64_t n_pkts = __builtin_popcountll(pkts_mask); + uint32_t i; + + for (i = 0; i < n_pkts; i++) { + struct rte_mbuf *pkt = pkts[i]; + uint32_t entry_pos = RTE_MBUF_METADATA_UINT32(pkt, + t->offset) & t->entry_pos_mask; + + entries[i] = (void *) &t->array[entry_pos * + t->entry_size]; + } + } else { + for ( ; pkts_mask; ) { + uint32_t pkt_index = __builtin_ctzll(pkts_mask); + uint64_t pkt_mask = 1LLU << pkt_index; + struct rte_mbuf *pkt = pkts[pkt_index]; + uint32_t entry_pos = RTE_MBUF_METADATA_UINT32(pkt, + t->offset) & t->entry_pos_mask; + + entries[pkt_index] = (void *) &t->array[entry_pos * + t->entry_size]; + pkts_mask &= ~pkt_mask; + } + } + + return 0; +} + +static int +rte_table_array_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_array *array = (struct rte_table_array *) table; + + if (stats != NULL) + memcpy(stats, &array->stats, sizeof(array->stats)); + + if (clear) + memset(&array->stats, 0, sizeof(array->stats)); + + return 0; +} + +struct rte_table_ops rte_table_array_ops = { + .f_create = rte_table_array_create, + .f_free = rte_table_array_free, + .f_add = rte_table_array_entry_add, + .f_delete = NULL, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_array_lookup, + .f_stats = rte_table_array_stats_read, +}; diff --git a/lib/librte_table/rte_table_array.h b/lib/librte_table/rte_table_array.h new file mode 100644 index 00000000..9521119e --- /dev/null +++ b/lib/librte_table/rte_table_array.h @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_ARRAY_H__ +#define __INCLUDE_RTE_TABLE_ARRAY_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table Array + * + * Simple array indexing. Lookup key is the array entry index. + * + ***/ + +#include + +#include "rte_table.h" + +/** Array table parameters */ +struct rte_table_array_params { + /** Number of array entries. Has to be a power of two. */ + uint32_t n_entries; + + /** Byte offset within input packet meta-data where lookup key (i.e. the + array entry index) is located. */ + uint32_t offset; +}; + +/** Array table key format */ +struct rte_table_array_key { + /** Array entry index */ + uint32_t pos; +}; + +/** Array table operations */ +extern struct rte_table_ops rte_table_array_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table_hash.h b/lib/librte_table/rte_table_hash.h new file mode 100644 index 00000000..9d17516a --- /dev/null +++ b/lib/librte_table/rte_table_hash.h @@ -0,0 +1,370 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_HASH_H__ +#define __INCLUDE_RTE_TABLE_HASH_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table Hash + * + * These tables use the exact match criterion to uniquely associate data to + * lookup keys. + * + * Use-cases: Flow classification table, Address Resolution Protocol (ARP) table + * + * Hash table types: + * 1. Entry add strategy on bucket full: + * a. Least Recently Used (LRU): One of the existing keys in the bucket is + * deleted and the new key is added in its place. The number of keys in + * each bucket never grows bigger than 4. The logic to pick the key to + * be dropped from the bucket is LRU. The hash table lookup operation + * maintains the order in which the keys in the same bucket are hit, so + * every time a key is hit, it becomes the new Most Recently Used (MRU) + * key, i.e. the most unlikely candidate for drop. When a key is added + * to the bucket, it also becomes the new MRU key. When a key needs to + * be picked and dropped, the most likely candidate for drop, i.e. the + * current LRU key, is always picked. The LRU logic requires maintaining + * specific data structures per each bucket. + * b. Extendible bucket (ext): The bucket is extended with space for 4 more + * keys. This is done by allocating additional memory at table init time, + * which is used to create a pool of free keys (the size of this pool is + * configurable and always a multiple of 4). On key add operation, the + * allocation of a group of 4 keys only happens successfully within the + * limit of free keys, otherwise the key add operation fails. On key + * delete operation, a group of 4 keys is freed back to the pool of free + * keys when the key to be deleted is the only key that was used within + * its group of 4 keys at that time. On key lookup operation, if the + * current bucket is in extended state and a match is not found in the + * first group of 4 keys, the search continues beyond the first group of + * 4 keys, potentially until all keys in this bucket are examined. The + * extendible bucket logic requires maintaining specific data structures + * per table and per each bucket. + * 2. Key signature computation: + * a. Pre-computed key signature: The key lookup operation is split between + * two CPU cores. The first CPU core (typically the CPU core performing + * packet RX) extracts the key from the input packet, computes the key + * signature and saves both the key and the key signature in the packet + * buffer as packet meta-data. The second CPU core reads both the key and + * the key signature from the packet meta-data and performs the bucket + * search step of the key lookup operation. + * b. Key signature computed on lookup (do-sig): The same CPU core reads + * the key from the packet meta-data, uses it to compute the key + * signature and also performs the bucket search step of the key lookup + * operation. + * 3. Key size: + * a. Configurable key size + * b. Single key size (8-byte, 16-byte or 32-byte key size) + * + ***/ +#include + +#include "rte_table.h" + +/** Hash function */ +typedef uint64_t (*rte_table_hash_op_hash)( + void *key, + uint32_t key_size, + uint64_t seed); + +/** + * Hash tables with configurable key size + * + */ +/** Extendible bucket hash table parameters */ +struct rte_table_hash_ext_params { + /** Key size (number of bytes) */ + uint32_t key_size; + + /** Maximum number of keys */ + uint32_t n_keys; + + /** Number of hash table buckets. Each bucket stores up to 4 keys. */ + uint32_t n_buckets; + + /** Number of hash table bucket extensions. Each bucket extension has + space for 4 keys and each bucket can have 0, 1 or more extensions. */ + uint32_t n_buckets_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed value for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** Extendible bucket hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_ext_ops; + +/** Extendible bucket hash table operations for key signature computed on + lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_ext_dosig_ops; + +/** LRU hash table parameters */ +struct rte_table_hash_lru_params { + /** Key size (number of bytes) */ + uint32_t key_size; + + /** Maximum number of keys */ + uint32_t n_keys; + + /** Number of hash table buckets. Each bucket stores up to 4 keys. */ + uint32_t n_buckets; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed value for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_lru_ops; + +/** LRU hash table operations for key signature computed on lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_lru_dosig_ops; + +/** + * 8-byte key hash tables + * + */ +/** LRU hash table parameters */ +struct rte_table_hash_key8_lru_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key8_lru_ops; + +/** LRU hash table operations for key signature computed on lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key8_lru_dosig_ops; + +/** Extendible bucket hash table parameters */ +struct rte_table_hash_key8_ext_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Number of entries (and keys) for hash table bucket extensions. Each + bucket is extended in increments of 4 keys. */ + uint32_t n_entries_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** Extendible bucket hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key8_ext_ops; + +/** Extendible bucket hash table operations for key signature computed on + lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key8_ext_dosig_ops; + +/** + * 16-byte key hash tables + * + */ +/** LRU hash table parameters */ +struct rte_table_hash_key16_lru_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key16_lru_ops; + +/** LRU hash table operations for key signature computed on lookup + ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key16_lru_dosig_ops; + +/** Extendible bucket hash table parameters */ +struct rte_table_hash_key16_ext_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Number of entries (and keys) for hash table bucket extensions. Each + bucket is extended in increments of 4 keys. */ + uint32_t n_entries_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; + + /** Bit-mask to be AND-ed to the key on lookup */ + uint8_t *key_mask; +}; + +/** Extendible bucket operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key16_ext_ops; + +/** Extendible bucket hash table operations for key signature computed on + lookup ("do-sig") */ +extern struct rte_table_ops rte_table_hash_key16_ext_dosig_ops; + +/** + * 32-byte key hash tables + * + */ +/** LRU hash table parameters */ +struct rte_table_hash_key32_lru_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** LRU hash table operations for pre-computed key signature */ +extern struct rte_table_ops rte_table_hash_key32_lru_ops; + +/** Extendible bucket hash table parameters */ +struct rte_table_hash_key32_ext_params { + /** Maximum number of entries (and keys) in the table */ + uint32_t n_entries; + + /** Number of entries (and keys) for hash table bucket extensions. Each + bucket is extended in increments of 4 keys. */ + uint32_t n_entries_ext; + + /** Hash function */ + rte_table_hash_op_hash f_hash; + + /** Seed for the hash function */ + uint64_t seed; + + /** Byte offset within packet meta-data where the 4-byte key signature + is located. Valid for pre-computed key signature tables, ignored for + do-sig tables. */ + uint32_t signature_offset; + + /** Byte offset within packet meta-data where the key is located */ + uint32_t key_offset; +}; + +/** Extendible bucket hash table operations */ +extern struct rte_table_ops rte_table_hash_key32_ext_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table_hash_ext.c b/lib/librte_table/rte_table_hash_ext.c new file mode 100644 index 00000000..e283a3d1 --- /dev/null +++ b/lib/librte_table/rte_table_hash_ext.c @@ -0,0 +1,1159 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "rte_table_hash.h" + +#define KEYS_PER_BUCKET 4 + +struct bucket { + union { + uintptr_t next; + uint64_t lru_list; + }; + uint16_t sig[KEYS_PER_BUCKET]; + uint32_t key_pos[KEYS_PER_BUCKET]; +}; + +#define BUCKET_NEXT(bucket) \ + ((void *) ((bucket)->next & (~1LU))) + +#define BUCKET_NEXT_VALID(bucket) \ + ((bucket)->next & 1LU) + +#define BUCKET_NEXT_SET(bucket, bucket_next) \ +do \ + (bucket)->next = (((uintptr_t) ((void *) (bucket_next))) | 1LU);\ +while (0) + +#define BUCKET_NEXT_SET_NULL(bucket) \ +do \ + (bucket)->next = 0; \ +while (0) + +#define BUCKET_NEXT_COPY(bucket, bucket2) \ +do \ + (bucket)->next = (bucket2)->next; \ +while (0) + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct grinder { + struct bucket *bkt; + uint64_t sig; + uint64_t match; + uint32_t key_index; +}; + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t key_size; + uint32_t entry_size; + uint32_t n_keys; + uint32_t n_buckets; + uint32_t n_buckets_ext; + rte_table_hash_op_hash f_hash; + uint64_t seed; + uint32_t signature_offset; + uint32_t key_offset; + + /* Internal */ + uint64_t bucket_mask; + uint32_t key_size_shl; + uint32_t data_size_shl; + uint32_t key_stack_tos; + uint32_t bkt_ext_stack_tos; + + /* Grinder */ + struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX]; + + /* Tables */ + struct bucket *buckets; + struct bucket *buckets_ext; + uint8_t *key_mem; + uint8_t *data_mem; + uint32_t *key_stack; + uint32_t *bkt_ext_stack; + + /* Table memory */ + uint8_t memory[0] __rte_cache_aligned; +}; + +static int +check_params_create(struct rte_table_hash_ext_params *params) +{ + uint32_t n_buckets_min; + + /* key_size */ + if ((params->key_size == 0) || + (!rte_is_power_of_2(params->key_size))) { + RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__); + return -EINVAL; + } + + /* n_keys */ + if ((params->n_keys == 0) || + (!rte_is_power_of_2(params->n_keys))) { + RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__); + return -EINVAL; + } + + /* n_buckets */ + n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys; + if ((params->n_buckets == 0) || + (!rte_is_power_of_2(params->n_keys)) || + (params->n_buckets < n_buckets_min)) { + RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_ext_create(void *params, int socket_id, uint32_t entry_size) +{ + struct rte_table_hash_ext_params *p = + (struct rte_table_hash_ext_params *) params; + struct rte_table_hash *t; + uint32_t total_size, table_meta_sz; + uint32_t bucket_sz, bucket_ext_sz, key_sz; + uint32_t key_stack_sz, bkt_ext_stack_sz, data_sz; + uint32_t bucket_offset, bucket_ext_offset, key_offset; + uint32_t key_stack_offset, bkt_ext_stack_offset, data_offset; + uint32_t i; + + /* Check input parameters */ + if ((check_params_create(p) != 0) || + (!rte_is_power_of_2(entry_size)) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + (sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2))) + return NULL; + + /* Memory allocation */ + table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash)); + bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket)); + bucket_ext_sz = + RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(struct bucket)); + key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size); + key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t)); + bkt_ext_stack_sz = + RTE_CACHE_LINE_ROUNDUP(p->n_buckets_ext * sizeof(uint32_t)); + data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size); + total_size = table_meta_sz + bucket_sz + bucket_ext_sz + key_sz + + key_stack_sz + bkt_ext_stack_sz + data_sz; + + t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (t == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is " + "%u bytes\n", __func__, p->key_size, total_size); + + /* Memory initialization */ + t->key_size = p->key_size; + t->entry_size = entry_size; + t->n_keys = p->n_keys; + t->n_buckets = p->n_buckets; + t->n_buckets_ext = p->n_buckets_ext; + t->f_hash = p->f_hash; + t->seed = p->seed; + t->signature_offset = p->signature_offset; + t->key_offset = p->key_offset; + + /* Internal */ + t->bucket_mask = t->n_buckets - 1; + t->key_size_shl = __builtin_ctzl(p->key_size); + t->data_size_shl = __builtin_ctzl(entry_size); + + /* Tables */ + bucket_offset = 0; + bucket_ext_offset = bucket_offset + bucket_sz; + key_offset = bucket_ext_offset + bucket_ext_sz; + key_stack_offset = key_offset + key_sz; + bkt_ext_stack_offset = key_stack_offset + key_stack_sz; + data_offset = bkt_ext_stack_offset + bkt_ext_stack_sz; + + t->buckets = (struct bucket *) &t->memory[bucket_offset]; + t->buckets_ext = (struct bucket *) &t->memory[bucket_ext_offset]; + t->key_mem = &t->memory[key_offset]; + t->key_stack = (uint32_t *) &t->memory[key_stack_offset]; + t->bkt_ext_stack = (uint32_t *) &t->memory[bkt_ext_stack_offset]; + t->data_mem = &t->memory[data_offset]; + + /* Key stack */ + for (i = 0; i < t->n_keys; i++) + t->key_stack[i] = t->n_keys - 1 - i; + t->key_stack_tos = t->n_keys; + + /* Bucket ext stack */ + for (i = 0; i < t->n_buckets_ext; i++) + t->bkt_ext_stack[i] = t->n_buckets_ext - 1 - i; + t->bkt_ext_stack_tos = t->n_buckets_ext; + + return t; +} + +static int +rte_table_hash_ext_free(void *table) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (t == NULL) + return -EINVAL; + + rte_free(t); + return 0; +} + +static int +rte_table_hash_ext_entry_add(void *table, void *key, void *entry, + int *key_found, void **entry_ptr) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct bucket *bkt0, *bkt, *bkt_prev; + uint64_t sig; + uint32_t bkt_index, i; + + sig = t->f_hash(key, t->key_size, t->seed); + bkt_index = sig & t->bucket_mask; + bkt0 = &t->buckets[bkt_index]; + sig = (sig >> 16) | 1LLU; + + /* Key is present in the bucket */ + for (bkt = bkt0; bkt != NULL; bkt = BUCKET_NEXT(bkt)) + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + uint32_t bkt_key_index = bkt->key_pos[i]; + uint8_t *bkt_key = + &t->key_mem[bkt_key_index << t->key_size_shl]; + + if ((sig == bkt_sig) && (memcmp(key, bkt_key, + t->key_size) == 0)) { + uint8_t *data = &t->data_mem[bkt_key_index << + t->data_size_shl]; + + memcpy(data, entry, t->entry_size); + *key_found = 1; + *entry_ptr = (void *) data; + return 0; + } + } + + /* Key is not present in the bucket */ + for (bkt_prev = NULL, bkt = bkt0; bkt != NULL; bkt_prev = bkt, + bkt = BUCKET_NEXT(bkt)) + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + + if (bkt_sig == 0) { + uint32_t bkt_key_index; + uint8_t *bkt_key, *data; + + /* Allocate new key */ + if (t->key_stack_tos == 0) /* No free keys */ + return -ENOSPC; + + bkt_key_index = t->key_stack[ + --t->key_stack_tos]; + + /* Install new key */ + bkt_key = &t->key_mem[bkt_key_index << + t->key_size_shl]; + data = &t->data_mem[bkt_key_index << + t->data_size_shl]; + + bkt->sig[i] = (uint16_t) sig; + bkt->key_pos[i] = bkt_key_index; + memcpy(bkt_key, key, t->key_size); + memcpy(data, entry, t->entry_size); + + *key_found = 0; + *entry_ptr = (void *) data; + return 0; + } + } + + /* Bucket full: extend bucket */ + if ((t->bkt_ext_stack_tos > 0) && (t->key_stack_tos > 0)) { + uint32_t bkt_key_index; + uint8_t *bkt_key, *data; + + /* Allocate new bucket ext */ + bkt_index = t->bkt_ext_stack[--t->bkt_ext_stack_tos]; + bkt = &t->buckets_ext[bkt_index]; + + /* Chain the new bucket ext */ + BUCKET_NEXT_SET(bkt_prev, bkt); + BUCKET_NEXT_SET_NULL(bkt); + + /* Allocate new key */ + bkt_key_index = t->key_stack[--t->key_stack_tos]; + bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl]; + + data = &t->data_mem[bkt_key_index << t->data_size_shl]; + + /* Install new key into bucket */ + bkt->sig[0] = (uint16_t) sig; + bkt->key_pos[0] = bkt_key_index; + memcpy(bkt_key, key, t->key_size); + memcpy(data, entry, t->entry_size); + + *key_found = 0; + *entry_ptr = (void *) data; + return 0; + } + + return -ENOSPC; +} + +static int +rte_table_hash_ext_entry_delete(void *table, void *key, int *key_found, +void *entry) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct bucket *bkt0, *bkt, *bkt_prev; + uint64_t sig; + uint32_t bkt_index, i; + + sig = t->f_hash(key, t->key_size, t->seed); + bkt_index = sig & t->bucket_mask; + bkt0 = &t->buckets[bkt_index]; + sig = (sig >> 16) | 1LLU; + + /* Key is present in the bucket */ + for (bkt_prev = NULL, bkt = bkt0; bkt != NULL; bkt_prev = bkt, + bkt = BUCKET_NEXT(bkt)) + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + uint32_t bkt_key_index = bkt->key_pos[i]; + uint8_t *bkt_key = &t->key_mem[bkt_key_index << + t->key_size_shl]; + + if ((sig == bkt_sig) && (memcmp(key, bkt_key, + t->key_size) == 0)) { + uint8_t *data = &t->data_mem[bkt_key_index << + t->data_size_shl]; + + /* Uninstall key from bucket */ + bkt->sig[i] = 0; + *key_found = 1; + if (entry) + memcpy(entry, data, t->entry_size); + + /* Free key */ + t->key_stack[t->key_stack_tos++] = + bkt_key_index; + + /*Check if bucket is unused */ + if ((bkt_prev != NULL) && + (bkt->sig[0] == 0) && (bkt->sig[1] == 0) && + (bkt->sig[2] == 0) && (bkt->sig[3] == 0)) { + /* Unchain bucket */ + BUCKET_NEXT_COPY(bkt_prev, bkt); + + /* Clear bucket */ + memset(bkt, 0, sizeof(struct bucket)); + + /* Free bucket back to buckets ext */ + bkt_index = bkt - t->buckets_ext; + t->bkt_ext_stack[t->bkt_ext_stack_tos++] + = bkt_index; + } + + return 0; + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +static int rte_table_hash_ext_lookup_unoptimized( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries, + int dosig) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in); + + for ( ; pkts_mask; ) { + struct bucket *bkt0, *bkt; + struct rte_mbuf *pkt; + uint8_t *key; + uint64_t pkt_mask, sig; + uint32_t pkt_index, bkt_index, i; + + pkt_index = __builtin_ctzll(pkts_mask); + pkt_mask = 1LLU << pkt_index; + pkts_mask &= ~pkt_mask; + + pkt = pkts[pkt_index]; + key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset); + if (dosig) + sig = (uint64_t) t->f_hash(key, t->key_size, t->seed); + else + sig = RTE_MBUF_METADATA_UINT32(pkt, + t->signature_offset); + + bkt_index = sig & t->bucket_mask; + bkt0 = &t->buckets[bkt_index]; + sig = (sig >> 16) | 1LLU; + + /* Key is present in the bucket */ + for (bkt = bkt0; bkt != NULL; bkt = BUCKET_NEXT(bkt)) + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + uint32_t bkt_key_index = bkt->key_pos[i]; + uint8_t *bkt_key = &t->key_mem[bkt_key_index << + t->key_size_shl]; + + if ((sig == bkt_sig) && (memcmp(key, bkt_key, + t->key_size) == 0)) { + uint8_t *data = &t->data_mem[ + bkt_key_index << t->data_size_shl]; + + pkts_mask_out |= pkt_mask; + entries[pkt_index] = (void *) data; + break; + } + } + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} + +/*** + * + * mask = match bitmask + * match = at least one match + * match_many = more than one match + * match_pos = position of first match + * + *---------------------------------------- + * mask match match_many match_pos + *---------------------------------------- + * 0000 0 0 00 + * 0001 1 0 00 + * 0010 1 0 01 + * 0011 1 1 00 + *---------------------------------------- + * 0100 1 0 10 + * 0101 1 1 00 + * 0110 1 1 01 + * 0111 1 1 00 + *---------------------------------------- + * 1000 1 0 11 + * 1001 1 1 00 + * 1010 1 1 01 + * 1011 1 1 00 + *---------------------------------------- + * 1100 1 1 10 + * 1101 1 1 00 + * 1110 1 1 01 + * 1111 1 1 00 + *---------------------------------------- + * + * match = 1111_1111_1111_1110 + * match_many = 1111_1110_1110_1000 + * match_pos = 0001_0010_0001_0011__0001_0010_0001_0000 + * + * match = 0xFFFELLU + * match_many = 0xFEE8LLU + * match_pos = 0x12131210LLU + * + ***/ + +#define LUT_MATCH 0xFFFELLU +#define LUT_MATCH_MANY 0xFEE8LLU +#define LUT_MATCH_POS 0x12131210LLU + +#define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos) \ +{ \ + uint64_t bucket_sig[4], mask[4], mask_all; \ + \ + bucket_sig[0] = bucket->sig[0]; \ + bucket_sig[1] = bucket->sig[1]; \ + bucket_sig[2] = bucket->sig[2]; \ + bucket_sig[3] = bucket->sig[3]; \ + \ + bucket_sig[0] ^= mbuf_sig; \ + bucket_sig[1] ^= mbuf_sig; \ + bucket_sig[2] ^= mbuf_sig; \ + bucket_sig[3] ^= mbuf_sig; \ + \ + mask[0] = 0; \ + mask[1] = 0; \ + mask[2] = 0; \ + mask[3] = 0; \ + \ + if (bucket_sig[0] == 0) \ + mask[0] = 1; \ + if (bucket_sig[1] == 0) \ + mask[1] = 2; \ + if (bucket_sig[2] == 0) \ + mask[2] = 4; \ + if (bucket_sig[3] == 0) \ + mask[3] = 8; \ + \ + mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]); \ + \ + match = (LUT_MATCH >> mask_all) & 1; \ + match_many = (LUT_MATCH_MANY >> mask_all) & 1; \ + match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3; \ +} + +#define lookup_cmp_key(mbuf, key, match_key, f) \ +{ \ + uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\ + uint64_t *bkt_key = (uint64_t *) key; \ + \ + switch (f->key_size) { \ + case 8: \ + { \ + uint64_t xor = pkt_key[0] ^ bkt_key[0]; \ + match_key = 0; \ + if (xor == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 16: \ + { \ + uint64_t xor[2], or; \ + \ + xor[0] = pkt_key[0] ^ bkt_key[0]; \ + xor[1] = pkt_key[1] ^ bkt_key[1]; \ + or = xor[0] | xor[1]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 32: \ + { \ + uint64_t xor[4], or; \ + \ + xor[0] = pkt_key[0] ^ bkt_key[0]; \ + xor[1] = pkt_key[1] ^ bkt_key[1]; \ + xor[2] = pkt_key[2] ^ bkt_key[2]; \ + xor[3] = pkt_key[3] ^ bkt_key[3]; \ + or = xor[0] | xor[1] | xor[2] | xor[3]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 64: \ + { \ + uint64_t xor[8], or; \ + \ + xor[0] = pkt_key[0] ^ bkt_key[0]; \ + xor[1] = pkt_key[1] ^ bkt_key[1]; \ + xor[2] = pkt_key[2] ^ bkt_key[2]; \ + xor[3] = pkt_key[3] ^ bkt_key[3]; \ + xor[4] = pkt_key[4] ^ bkt_key[4]; \ + xor[5] = pkt_key[5] ^ bkt_key[5]; \ + xor[6] = pkt_key[6] ^ bkt_key[6]; \ + xor[7] = pkt_key[7] ^ bkt_key[7]; \ + or = xor[0] | xor[1] | xor[2] | xor[3] | \ + xor[4] | xor[5] | xor[6] | xor[7]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + default: \ + match_key = 0; \ + if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \ + match_key = 1; \ + } \ +} + +#define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + struct rte_mbuf *mbuf00, *mbuf01; \ + uint32_t key_offset = t->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + mbuf00 = pkts[pkt00_index]; \ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + mbuf01 = pkts[pkt01_index]; \ + \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \ + pkt01_index) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + struct rte_mbuf *mbuf00, *mbuf01; \ + uint32_t key_offset = t->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + mbuf00 = pkts[pkt00_index]; \ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + if (pkts_mask == 0) \ + pkt01_index = pkt00_index; \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + mbuf01 = pkts[pkt01_index]; \ + \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \ +{ \ + struct grinder *g10, *g11; \ + uint64_t sig10, sig11, bkt10_index, bkt11_index; \ + struct rte_mbuf *mbuf10, *mbuf11; \ + struct bucket *bkt10, *bkt11, *buckets = t->buckets; \ + uint64_t bucket_mask = t->bucket_mask; \ + uint32_t signature_offset = t->signature_offset; \ + \ + mbuf10 = pkts[pkt10_index]; \ + sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\ + bkt10_index = sig10 & bucket_mask; \ + bkt10 = &buckets[bkt10_index]; \ + \ + mbuf11 = pkts[pkt11_index]; \ + sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\ + bkt11_index = sig11 & bucket_mask; \ + bkt11 = &buckets[bkt11_index]; \ + \ + rte_prefetch0(bkt10); \ + rte_prefetch0(bkt11); \ + \ + g10 = &g[pkt10_index]; \ + g10->sig = sig10; \ + g10->bkt = bkt10; \ + \ + g11 = &g[pkt11_index]; \ + g11->sig = sig11; \ + g11->bkt = bkt11; \ +} + +#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index) \ +{ \ + struct grinder *g10, *g11; \ + uint64_t sig10, sig11, bkt10_index, bkt11_index; \ + struct rte_mbuf *mbuf10, *mbuf11; \ + struct bucket *bkt10, *bkt11, *buckets = t->buckets; \ + uint8_t *key10, *key11; \ + uint64_t bucket_mask = t->bucket_mask; \ + rte_table_hash_op_hash f_hash = t->f_hash; \ + uint64_t seed = t->seed; \ + uint32_t key_size = t->key_size; \ + uint32_t key_offset = t->key_offset; \ + \ + mbuf10 = pkts[pkt10_index]; \ + key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset); \ + sig10 = (uint64_t) f_hash(key10, key_size, seed); \ + bkt10_index = sig10 & bucket_mask; \ + bkt10 = &buckets[bkt10_index]; \ + \ + mbuf11 = pkts[pkt11_index]; \ + key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset); \ + sig11 = (uint64_t) f_hash(key11, key_size, seed); \ + bkt11_index = sig11 & bucket_mask; \ + bkt11 = &buckets[bkt11_index]; \ + \ + rte_prefetch0(bkt10); \ + rte_prefetch0(bkt11); \ + \ + g10 = &g[pkt10_index]; \ + g10->sig = sig10; \ + g10->bkt = bkt10; \ + \ + g11 = &g[pkt11_index]; \ + g11->sig = sig11; \ + g11->bkt = bkt11; \ +} + +#define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)\ +{ \ + struct grinder *g20, *g21; \ + uint64_t sig20, sig21; \ + struct bucket *bkt20, *bkt21; \ + uint8_t *key20, *key21, *key_mem = t->key_mem; \ + uint64_t match20, match21, match_many20, match_many21; \ + uint64_t match_pos20, match_pos21; \ + uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;\ + \ + g20 = &g[pkt20_index]; \ + sig20 = g20->sig; \ + bkt20 = g20->bkt; \ + sig20 = (sig20 >> 16) | 1LLU; \ + lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);\ + match20 <<= pkt20_index; \ + match_many20 |= BUCKET_NEXT_VALID(bkt20); \ + match_many20 <<= pkt20_index; \ + key20_index = bkt20->key_pos[match_pos20]; \ + key20 = &key_mem[key20_index << key_size_shl]; \ + \ + g21 = &g[pkt21_index]; \ + sig21 = g21->sig; \ + bkt21 = g21->bkt; \ + sig21 = (sig21 >> 16) | 1LLU; \ + lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);\ + match21 <<= pkt21_index; \ + match_many21 |= BUCKET_NEXT_VALID(bkt21); \ + match_many21 <<= pkt21_index; \ + key21_index = bkt21->key_pos[match_pos21]; \ + key21 = &key_mem[key21_index << key_size_shl]; \ + \ + rte_prefetch0(key20); \ + rte_prefetch0(key21); \ + \ + pkts_mask_match_many |= match_many20 | match_many21; \ + \ + g20->match = match20; \ + g20->key_index = key20_index; \ + \ + g21->match = match21; \ + g21->key_index = key21_index; \ +} + +#define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, \ + entries) \ +{ \ + struct grinder *g30, *g31; \ + struct rte_mbuf *mbuf30, *mbuf31; \ + uint8_t *key30, *key31, *key_mem = t->key_mem; \ + uint8_t *data30, *data31, *data_mem = t->data_mem; \ + uint64_t match30, match31, match_key30, match_key31, match_keys;\ + uint32_t key30_index, key31_index; \ + uint32_t key_size_shl = t->key_size_shl; \ + uint32_t data_size_shl = t->data_size_shl; \ + \ + mbuf30 = pkts[pkt30_index]; \ + g30 = &g[pkt30_index]; \ + match30 = g30->match; \ + key30_index = g30->key_index; \ + key30 = &key_mem[key30_index << key_size_shl]; \ + lookup_cmp_key(mbuf30, key30, match_key30, t); \ + match_key30 <<= pkt30_index; \ + match_key30 &= match30; \ + data30 = &data_mem[key30_index << data_size_shl]; \ + entries[pkt30_index] = data30; \ + \ + mbuf31 = pkts[pkt31_index]; \ + g31 = &g[pkt31_index]; \ + match31 = g31->match; \ + key31_index = g31->key_index; \ + key31 = &key_mem[key31_index << key_size_shl]; \ + lookup_cmp_key(mbuf31, key31, match_key31, t); \ + match_key31 <<= pkt31_index; \ + match_key31 &= match31; \ + data31 = &data_mem[key31_index << data_size_shl]; \ + entries[pkt31_index] = data31; \ + \ + rte_prefetch0(data30); \ + rte_prefetch0(data31); \ + \ + match_keys = match_key30 | match_key31; \ + pkts_mask_out |= match_keys; \ +} + +/*** +* The lookup function implements a 4-stage pipeline, with each stage processing +* two different packets. The purpose of pipelined implementation is to hide the +* latency of prefetching the data structures and loosen the data dependency +* between instructions. +* +* p00 _______ p10 _______ p20 _______ p30 _______ +*----->| |----->| |----->| |----->| |-----> +* | 0 | | 1 | | 2 | | 3 | +*----->|_______|----->|_______|----->|_______|----->|_______|-----> +* p01 p11 p21 p31 +* +* The naming convention is: +* pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1 +* +***/ +static int rte_table_hash_ext_lookup( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct grinder *g = t->grinders; + uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index; + uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index; + uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0; + int status = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in); + + /* Cannot run the pipeline with less than 7 packets */ + if (__builtin_popcountll(pkts_mask) < 7) + return rte_table_hash_ext_lookup_unoptimized(table, pkts, + pkts_mask, lookup_hit_mask, entries, 0); + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline feed */ + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline feed */ + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, + pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, + pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, + pkts_mask_out, entries); + } + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Slow path */ + pkts_mask_match_many &= ~pkts_mask_out; + if (pkts_mask_match_many) { + uint64_t pkts_mask_out_slow = 0; + + status = rte_table_hash_ext_lookup_unoptimized(table, pkts, + pkts_mask_match_many, &pkts_mask_out_slow, entries, 0); + pkts_mask_out |= pkts_mask_out_slow; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return status; +} + +static int rte_table_hash_ext_lookup_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct grinder *g = t->grinders; + uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index; + uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index; + uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0; + int status = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_EXT_STATS_PKTS_IN_ADD(t, n_pkts_in); + + /* Cannot run the pipeline with less than 7 packets */ + if (__builtin_popcountll(pkts_mask) < 7) + return rte_table_hash_ext_lookup_unoptimized(table, pkts, + pkts_mask, lookup_hit_mask, entries, 1); + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline feed */ + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline feed */ + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, + pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, + pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, + pkts_mask_out, entries); + } + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Slow path */ + pkts_mask_match_many &= ~pkts_mask_out; + if (pkts_mask_match_many) { + uint64_t pkts_mask_out_slow = 0; + + status = rte_table_hash_ext_lookup_unoptimized(table, pkts, + pkts_mask_match_many, &pkts_mask_out_slow, entries, 1); + pkts_mask_out |= pkts_mask_out_slow; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_EXT_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return status; +} + +static int +rte_table_hash_ext_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_ext_ops = { + .f_create = rte_table_hash_ext_create, + .f_free = rte_table_hash_ext_free, + .f_add = rte_table_hash_ext_entry_add, + .f_delete = rte_table_hash_ext_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_ext_lookup, + .f_stats = rte_table_hash_ext_stats_read, +}; + +struct rte_table_ops rte_table_hash_ext_dosig_ops = { + .f_create = rte_table_hash_ext_create, + .f_free = rte_table_hash_ext_free, + .f_add = rte_table_hash_ext_entry_add, + .f_delete = rte_table_hash_ext_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_ext_lookup_dosig, + .f_stats = rte_table_hash_ext_stats_read, +}; diff --git a/lib/librte_table/rte_table_hash_key16.c b/lib/librte_table/rte_table_hash_key16.c new file mode 100644 index 00000000..b7e000fd --- /dev/null +++ b/lib/librte_table/rte_table_hash_key16.c @@ -0,0 +1,1515 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include "rte_table_hash.h" +#include "rte_lru.h" + +#define RTE_TABLE_HASH_KEY_SIZE 16 + +#define RTE_BUCKET_ENTRY_VALID 0x1LLU + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_bucket_4_16 { + /* Cache line 0 */ + uint64_t signature[4 + 1]; + uint64_t lru_list; + struct rte_bucket_4_16 *next; + uint64_t next_valid; + + /* Cache line 1 */ + uint64_t key[4][2]; + + /* Cache line 2 */ + uint8_t data[0]; +}; + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t n_buckets; + uint32_t n_entries_per_bucket; + uint32_t key_size; + uint32_t entry_size; + uint32_t bucket_size; + uint32_t signature_offset; + uint32_t key_offset; + uint64_t key_mask[2]; + rte_table_hash_op_hash f_hash; + uint64_t seed; + + /* Extendible buckets */ + uint32_t n_buckets_ext; + uint32_t stack_pos; + uint32_t *stack; + + /* Lookup table */ + uint8_t memory[0] __rte_cache_aligned; +}; + +static int +check_params_create_lru(struct rte_table_hash_key16_lru_params *params) { + /* n_entries */ + if (params->n_entries == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, + "%s: f_hash function pointer is NULL\n", __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_create_key16_lru(void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_table_hash_key16_lru_params *p = + (struct rte_table_hash_key16_lru_params *) params; + struct rte_table_hash *f; + uint32_t n_buckets, n_entries_per_bucket, + key_size, bucket_size_cl, total_size, i; + + /* Check input parameters */ + if ((check_params_create_lru(p) != 0) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + return NULL; + n_entries_per_bucket = 4; + key_size = 16; + + /* Memory allocation */ + n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / + n_entries_per_bucket); + bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket + * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; + total_size = sizeof(struct rte_table_hash) + n_buckets * + bucket_size_cl * RTE_CACHE_LINE_SIZE; + + f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (f == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, + "%s: Hash table memory footprint is %u bytes\n", + __func__, total_size); + + /* Memory initialization */ + f->n_buckets = n_buckets; + f->n_entries_per_bucket = n_entries_per_bucket; + f->key_size = key_size; + f->entry_size = entry_size; + f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; + f->signature_offset = p->signature_offset; + f->key_offset = p->key_offset; + f->f_hash = p->f_hash; + f->seed = p->seed; + + if (p->key_mask != NULL) { + f->key_mask[0] = ((uint64_t *)p->key_mask)[0]; + f->key_mask[1] = ((uint64_t *)p->key_mask)[1]; + } else { + f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU; + } + + for (i = 0; i < n_buckets; i++) { + struct rte_bucket_4_16 *bucket; + + bucket = (struct rte_bucket_4_16 *) &f->memory[i * + f->bucket_size]; + lru_init(bucket); + } + + return f; +} + +static int +rte_table_hash_free_key16_lru(void *table) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (f == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + rte_free(f); + return 0; +} + +static int +rte_table_hash_entry_add_key16_lru( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket; + uint64_t signature, pos; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket = (struct rte_bucket_4_16 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + memcpy(bucket_data, entry, f->entry_size); + lru_update(bucket, i); + *key_found = 1; + *entry_ptr = (void *) bucket_data; + return 0; + } + } + + /* Key is not present in the bucket */ + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if (bucket_signature == 0) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + bucket->signature[i] = signature; + memcpy(bucket_key, key, f->key_size); + memcpy(bucket_data, entry, f->entry_size); + lru_update(bucket, i); + *key_found = 0; + *entry_ptr = (void *) bucket_data; + + return 0; + } + } + + /* Bucket full: replace LRU entry */ + pos = lru_pos(bucket); + bucket->signature[pos] = signature; + memcpy(bucket->key[pos], key, f->key_size); + memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size); + lru_update(bucket, pos); + *key_found = 0; + *entry_ptr = (void *) &bucket->data[pos * f->entry_size]; + + return 0; +} + +static int +rte_table_hash_entry_delete_key16_lru( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket = (struct rte_bucket_4_16 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + bucket->signature[i] = 0; + *key_found = 1; + if (entry) + memcpy(entry, bucket_data, f->entry_size); + return 0; + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +static int +check_params_create_ext(struct rte_table_hash_key16_ext_params *params) { + /* n_entries */ + if (params->n_entries == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); + return -EINVAL; + } + + /* n_entries_ext */ + if (params->n_entries_ext == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, + "%s: f_hash function pointer is NULL\n", __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_create_key16_ext(void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_table_hash_key16_ext_params *p = + (struct rte_table_hash_key16_ext_params *) params; + struct rte_table_hash *f; + uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size, + bucket_size_cl, stack_size_cl, total_size, i; + + /* Check input parameters */ + if ((check_params_create_ext(p) != 0) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + ((sizeof(struct rte_bucket_4_16) % RTE_CACHE_LINE_SIZE) != 0)) + return NULL; + + n_entries_per_bucket = 4; + key_size = 16; + + /* Memory allocation */ + n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / + n_entries_per_bucket); + n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) / + n_entries_per_bucket; + bucket_size_cl = (sizeof(struct rte_bucket_4_16) + n_entries_per_bucket + * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; + stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1) + / RTE_CACHE_LINE_SIZE; + total_size = sizeof(struct rte_table_hash) + + ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) * + RTE_CACHE_LINE_SIZE; + + f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (f == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, + "%s: Hash table memory footprint is %u bytes\n", + __func__, total_size); + + /* Memory initialization */ + f->n_buckets = n_buckets; + f->n_entries_per_bucket = n_entries_per_bucket; + f->key_size = key_size; + f->entry_size = entry_size; + f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; + f->signature_offset = p->signature_offset; + f->key_offset = p->key_offset; + f->f_hash = p->f_hash; + f->seed = p->seed; + + f->n_buckets_ext = n_buckets_ext; + f->stack_pos = n_buckets_ext; + f->stack = (uint32_t *) + &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size]; + + for (i = 0; i < n_buckets_ext; i++) + f->stack[i] = i; + + if (p->key_mask != NULL) { + f->key_mask[0] = (((uint64_t *)p->key_mask)[0]); + f->key_mask[1] = (((uint64_t *)p->key_mask)[1]); + } else { + f->key_mask[0] = 0xFFFFFFFFFFFFFFFFLLU; + f->key_mask[1] = 0xFFFFFFFFFFFFFFFFLLU; + } + + return f; +} + +static int +rte_table_hash_free_key16_ext(void *table) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (f == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + rte_free(f); + return 0; +} + +static int +rte_table_hash_entry_add_key16_ext( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket0 = (struct rte_bucket_4_16 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (bucket = bucket0; bucket != NULL; bucket = bucket->next) + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + memcpy(bucket_data, entry, f->entry_size); + *key_found = 1; + *entry_ptr = (void *) bucket_data; + return 0; + } + } + + /* Key is not present in the bucket */ + for (bucket_prev = NULL, bucket = bucket0; bucket != NULL; + bucket_prev = bucket, bucket = bucket->next) + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if (bucket_signature == 0) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + bucket->signature[i] = signature; + memcpy(bucket_key, key, f->key_size); + memcpy(bucket_data, entry, f->entry_size); + *key_found = 0; + *entry_ptr = (void *) bucket_data; + + return 0; + } + } + + /* Bucket full: extend bucket */ + if (f->stack_pos > 0) { + bucket_index = f->stack[--f->stack_pos]; + + bucket = (struct rte_bucket_4_16 *) &f->memory[(f->n_buckets + + bucket_index) * f->bucket_size]; + bucket_prev->next = bucket; + bucket_prev->next_valid = 1; + + bucket->signature[0] = signature; + memcpy(bucket->key[0], key, f->key_size); + memcpy(&bucket->data[0], entry, f->entry_size); + *key_found = 0; + *entry_ptr = (void *) &bucket->data[0]; + return 0; + } + + return -ENOSPC; +} + +static int +rte_table_hash_entry_delete_key16_ext( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket0, *bucket, *bucket_prev; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket0 = (struct rte_bucket_4_16 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (bucket_prev = NULL, bucket = bucket0; bucket != NULL; + bucket_prev = bucket, bucket = bucket->next) + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + bucket->signature[i] = 0; + *key_found = 1; + if (entry) + memcpy(entry, bucket_data, + f->entry_size); + + if ((bucket->signature[0] == 0) && + (bucket->signature[1] == 0) && + (bucket->signature[2] == 0) && + (bucket->signature[3] == 0) && + (bucket_prev != NULL)) { + bucket_prev->next = bucket->next; + bucket_prev->next_valid = + bucket->next_valid; + + memset(bucket, 0, + sizeof(struct rte_bucket_4_16)); + bucket_index = (((uint8_t *)bucket - + (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets; + f->stack[f->stack_pos++] = bucket_index; + } + + return 0; + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +#define lookup_key16_cmp(key_in, bucket, pos) \ +{ \ + uint64_t xor[4][2], or[4], signature[4]; \ + \ + signature[0] = (~bucket->signature[0]) & 1; \ + signature[1] = (~bucket->signature[1]) & 1; \ + signature[2] = (~bucket->signature[2]) & 1; \ + signature[3] = (~bucket->signature[3]) & 1; \ + \ + xor[0][0] = key_in[0] ^ bucket->key[0][0]; \ + xor[0][1] = key_in[1] ^ bucket->key[0][1]; \ + \ + xor[1][0] = key_in[0] ^ bucket->key[1][0]; \ + xor[1][1] = key_in[1] ^ bucket->key[1][1]; \ + \ + xor[2][0] = key_in[0] ^ bucket->key[2][0]; \ + xor[2][1] = key_in[1] ^ bucket->key[2][1]; \ + \ + xor[3][0] = key_in[0] ^ bucket->key[3][0]; \ + xor[3][1] = key_in[1] ^ bucket->key[3][1]; \ + \ + or[0] = xor[0][0] | xor[0][1] | signature[0]; \ + or[1] = xor[1][0] | xor[1][1] | signature[1]; \ + or[2] = xor[2][0] | xor[2][1] | signature[2]; \ + or[3] = xor[3][0] | xor[3][1] | signature[3]; \ + \ + pos = 4; \ + if (or[0] == 0) \ + pos = 0; \ + if (or[1] == 0) \ + pos = 1; \ + if (or[2] == 0) \ + pos = 2; \ + if (or[3] == 0) \ + pos = 3; \ +} + +#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f) \ +{ \ + uint64_t pkt_mask; \ + uint32_t key_offset = f->key_offset;\ + \ + pkt0_index = __builtin_ctzll(pkts_mask); \ + pkt_mask = 1LLU << pkt0_index; \ + pkts_mask &= ~pkt_mask; \ + \ + mbuf0 = pkts[pkt0_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\ +} + +#define lookup1_stage1(mbuf1, bucket1, f) \ +{ \ + uint64_t signature; \ + uint32_t bucket_index; \ + \ + signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\ + bucket_index = signature & (f->n_buckets - 1); \ + bucket1 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket_index * f->bucket_size]; \ + rte_prefetch0(bucket1); \ + rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\ +} + +#define lookup1_stage1_dosig(mbuf1, bucket1, f) \ +{ \ + uint64_t *key; \ + uint64_t signature = 0; \ + uint32_t bucket_index; \ + uint64_t hash_key_buffer[2]; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\ + \ + hash_key_buffer[0] = key[0] & f->key_mask[0]; \ + hash_key_buffer[1] = key[1] & f->key_mask[1]; \ + signature = f->f_hash(hash_key_buffer, \ + RTE_TABLE_HASH_KEY_SIZE, f->seed); \ + \ + bucket_index = signature & (f->n_buckets - 1); \ + bucket1 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket_index * f->bucket_size]; \ + rte_prefetch0(bucket1); \ + rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\ +} + +#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2, \ + pkts_mask_out, entries, f) \ +{ \ + void *a; \ + uint64_t pkt_mask; \ + uint64_t *key; \ + uint64_t hash_key_buffer[2]; \ + uint32_t pos; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ + hash_key_buffer[0] = key[0] & f->key_mask[0]; \ + hash_key_buffer[1] = key[1] & f->key_mask[1]; \ + \ + lookup_key16_cmp(hash_key_buffer, bucket2, pos); \ + \ + pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket2->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt2_index] = a; \ + lru_update(bucket2, pos); \ +} + +#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out, entries, \ + buckets_mask, buckets, keys, f) \ +{ \ + struct rte_bucket_4_16 *bucket_next; \ + void *a; \ + uint64_t pkt_mask, bucket_mask; \ + uint64_t *key; \ + uint64_t hash_key_buffer[2]; \ + uint32_t pos; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ + hash_key_buffer[0] = key[0] & f->key_mask[0]; \ + hash_key_buffer[1] = key[1] & f->key_mask[1]; \ + \ + lookup_key16_cmp(hash_key_buffer, bucket2, pos); \ + \ + pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket2->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt2_index] = a; \ + \ + bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\ + buckets_mask |= bucket_mask; \ + bucket_next = bucket2->next; \ + buckets[pkt2_index] = bucket_next; \ + keys[pkt2_index] = key; \ +} + +#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\ + buckets_mask, f) \ +{ \ + struct rte_bucket_4_16 *bucket, *bucket_next; \ + void *a; \ + uint64_t pkt_mask, bucket_mask; \ + uint64_t *key; \ + uint64_t hash_key_buffer[2]; \ + uint32_t pos; \ + \ + bucket = buckets[pkt_index]; \ + key = keys[pkt_index]; \ + hash_key_buffer[0] = key[0] & f->key_mask[0]; \ + hash_key_buffer[1] = key[1] & f->key_mask[1]; \ + \ + lookup_key16_cmp(hash_key_buffer, bucket, pos); \ + \ + pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt_index] = a; \ + \ + bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\ + buckets_mask |= bucket_mask; \ + bucket_next = bucket->next; \ + rte_prefetch0(bucket_next); \ + rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\ + buckets[pkt_index] = bucket_next; \ + keys[pkt_index] = key; \ +} + +#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\ + pkts, pkts_mask, f) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + uint32_t key_offset = f->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + \ + mbuf00 = pkts[pkt00_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + \ + mbuf01 = pkts[pkt01_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\ + mbuf00, mbuf01, pkts, pkts_mask, f) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + uint32_t key_offset = f->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + \ + mbuf00 = pkts[pkt00_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset)); \ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + if (pkts_mask == 0) \ + pkt01_index = pkt00_index; \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + \ + mbuf01 = pkts[pkt01_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset)); \ +} + +#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \ +{ \ + uint64_t signature10, signature11; \ + uint32_t bucket10_index, bucket11_index; \ + \ + signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\ + bucket10_index = signature10 & (f->n_buckets - 1); \ + bucket10 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket10_index * f->bucket_size]; \ + rte_prefetch0(bucket10); \ + rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\ + \ + signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\ + bucket11_index = signature11 & (f->n_buckets - 1); \ + bucket11 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket11_index * f->bucket_size]; \ + rte_prefetch0(bucket11); \ + rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\ +} + +#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f) \ +{ \ + uint64_t *key10, *key11; \ + uint64_t hash_offset_buffer[2]; \ + uint64_t signature10, signature11; \ + uint32_t bucket10_index, bucket11_index; \ + \ + key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, f->key_offset);\ + hash_offset_buffer[0] = key10[0] & f->key_mask[0]; \ + hash_offset_buffer[1] = key10[1] & f->key_mask[1]; \ + signature10 = f->f_hash(hash_offset_buffer, \ + RTE_TABLE_HASH_KEY_SIZE, f->seed);\ + bucket10_index = signature10 & (f->n_buckets - 1); \ + bucket10 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket10_index * f->bucket_size]; \ + rte_prefetch0(bucket10); \ + rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\ + \ + key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, f->key_offset);\ + hash_offset_buffer[0] = key11[0] & f->key_mask[0]; \ + hash_offset_buffer[1] = key11[1] & f->key_mask[1]; \ + signature11 = f->f_hash(hash_offset_buffer, \ + RTE_TABLE_HASH_KEY_SIZE, f->seed);\ + bucket11_index = signature11 & (f->n_buckets - 1); \ + bucket11 = (struct rte_bucket_4_16 *) \ + &f->memory[bucket11_index * f->bucket_size]; \ + rte_prefetch0(bucket11); \ + rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\ +} + +#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\ + bucket20, bucket21, pkts_mask_out, entries, f) \ +{ \ + void *a20, *a21; \ + uint64_t pkt20_mask, pkt21_mask; \ + uint64_t *key20, *key21; \ + uint64_t hash_key_buffer20[2]; \ + uint64_t hash_key_buffer21[2]; \ + uint32_t pos20, pos21; \ + \ + key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ + key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ + hash_key_buffer20[0] = key20[0] & f->key_mask[0]; \ + hash_key_buffer20[1] = key20[1] & f->key_mask[1]; \ + hash_key_buffer21[0] = key21[0] & f->key_mask[0]; \ + hash_key_buffer21[1] = key21[1] & f->key_mask[1]; \ + \ + lookup_key16_cmp(hash_key_buffer20, bucket20, pos20); \ + lookup_key16_cmp(hash_key_buffer21, bucket21, pos21); \ + \ + pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ + pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ + pkts_mask_out |= pkt20_mask | pkt21_mask; \ + \ + a20 = (void *) &bucket20->data[pos20 * f->entry_size]; \ + a21 = (void *) &bucket21->data[pos21 * f->entry_size]; \ + rte_prefetch0(a20); \ + rte_prefetch0(a21); \ + entries[pkt20_index] = a20; \ + entries[pkt21_index] = a21; \ + lru_update(bucket20, pos20); \ + lru_update(bucket21, pos21); \ +} + +#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \ + bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f) \ +{ \ + struct rte_bucket_4_16 *bucket20_next, *bucket21_next; \ + void *a20, *a21; \ + uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\ + uint64_t *key20, *key21; \ + uint64_t hash_key_buffer20[2]; \ + uint64_t hash_key_buffer21[2]; \ + uint32_t pos20, pos21; \ + \ + key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ + key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ + hash_key_buffer20[0] = key20[0] & f->key_mask[0]; \ + hash_key_buffer20[1] = key20[1] & f->key_mask[1]; \ + hash_key_buffer21[0] = key21[0] & f->key_mask[0]; \ + hash_key_buffer21[1] = key21[1] & f->key_mask[1]; \ + \ + lookup_key16_cmp(hash_key_buffer20, bucket20, pos20); \ + lookup_key16_cmp(hash_key_buffer21, bucket21, pos21); \ + \ + pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ + pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ + pkts_mask_out |= pkt20_mask | pkt21_mask; \ + \ + a20 = (void *) &bucket20->data[pos20 * f->entry_size]; \ + a21 = (void *) &bucket21->data[pos21 * f->entry_size]; \ + rte_prefetch0(a20); \ + rte_prefetch0(a21); \ + entries[pkt20_index] = a20; \ + entries[pkt21_index] = a21; \ + \ + bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\ + bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\ + buckets_mask |= bucket20_mask | bucket21_mask; \ + bucket20_next = bucket20->next; \ + bucket21_next = bucket21->next; \ + buckets[pkt20_index] = bucket20_next; \ + buckets[pkt21_index] = bucket21_next; \ + keys[pkt20_index] = key20; \ + keys[pkt21_index] = key21; \ +} + +static int +rte_table_hash_lookup_key16_lru( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_16 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1(mbuf, bucket, f); + lookup1_stage2_lru(pkt_index, mbuf, bucket, + pkts_mask_out, entries, f); + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, + n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key16_lru() */ + +static int +rte_table_hash_lookup_key16_lru_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_16 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1_dosig(mbuf, bucket, f); + lookup1_stage2_lru(pkt_index, mbuf, bucket, + pkts_mask_out, entries, f); + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key16_lru_dosig() */ + +static int +rte_table_hash_lookup_key16_ext( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0, buckets_mask = 0; + struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_16 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1(mbuf, bucket, f); + lookup1_stage2_ext(pkt_index, mbuf, bucket, + pkts_mask_out, entries, buckets_mask, + buckets, keys, f); + } + + goto grind_next_buckets; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + +grind_next_buckets: + /* Grind next buckets */ + for ( ; buckets_mask; ) { + uint64_t buckets_mask_next = 0; + + for ( ; buckets_mask; ) { + uint64_t pkt_mask; + uint32_t pkt_index; + + pkt_index = __builtin_ctzll(buckets_mask); + pkt_mask = 1LLU << pkt_index; + buckets_mask &= ~pkt_mask; + + lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, + entries, buckets_mask_next, f); + } + + buckets_mask = buckets_mask_next; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key16_ext() */ + +static int +rte_table_hash_lookup_key16_ext_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_16 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0, buckets_mask = 0; + struct rte_bucket_4_16 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_HASH_KEY16_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_16 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1_dosig(mbuf, bucket, f); + lookup1_stage2_ext(pkt_index, mbuf, bucket, + pkts_mask_out, entries, buckets_mask, + buckets, keys, f); + } + + goto grind_next_buckets; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + +grind_next_buckets: + /* Grind next buckets */ + for ( ; buckets_mask; ) { + uint64_t buckets_mask_next = 0; + + for ( ; buckets_mask; ) { + uint64_t pkt_mask; + uint32_t pkt_index; + + pkt_index = __builtin_ctzll(buckets_mask); + pkt_mask = 1LLU << pkt_index; + buckets_mask &= ~pkt_mask; + + lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, + entries, buckets_mask_next, f); + } + + buckets_mask = buckets_mask_next; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY16_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - + __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key16_ext_dosig() */ + +static int +rte_table_hash_key16_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_key16_lru_ops = { + .f_create = rte_table_hash_create_key16_lru, + .f_free = rte_table_hash_free_key16_lru, + .f_add = rte_table_hash_entry_add_key16_lru, + .f_delete = rte_table_hash_entry_delete_key16_lru, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key16_lru, + .f_stats = rte_table_hash_key16_stats_read, +}; + +struct rte_table_ops rte_table_hash_key16_lru_dosig_ops = { + .f_create = rte_table_hash_create_key16_lru, + .f_free = rte_table_hash_free_key16_lru, + .f_add = rte_table_hash_entry_add_key16_lru, + .f_delete = rte_table_hash_entry_delete_key16_lru, + .f_lookup = rte_table_hash_lookup_key16_lru_dosig, + .f_stats = rte_table_hash_key16_stats_read, +}; + +struct rte_table_ops rte_table_hash_key16_ext_ops = { + .f_create = rte_table_hash_create_key16_ext, + .f_free = rte_table_hash_free_key16_ext, + .f_add = rte_table_hash_entry_add_key16_ext, + .f_delete = rte_table_hash_entry_delete_key16_ext, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key16_ext, + .f_stats = rte_table_hash_key16_stats_read, +}; + +struct rte_table_ops rte_table_hash_key16_ext_dosig_ops = { + .f_create = rte_table_hash_create_key16_ext, + .f_free = rte_table_hash_free_key16_ext, + .f_add = rte_table_hash_entry_add_key16_ext, + .f_delete = rte_table_hash_entry_delete_key16_ext, + .f_lookup = rte_table_hash_lookup_key16_ext_dosig, + .f_stats = rte_table_hash_key16_stats_read, +}; diff --git a/lib/librte_table/rte_table_hash_key32.c b/lib/librte_table/rte_table_hash_key32.c new file mode 100644 index 00000000..a7aba492 --- /dev/null +++ b/lib/librte_table/rte_table_hash_key32.c @@ -0,0 +1,1144 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include "rte_table_hash.h" +#include "rte_lru.h" + +#define RTE_TABLE_HASH_KEY_SIZE 32 + +#define RTE_BUCKET_ENTRY_VALID 0x1LLU + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_bucket_4_32 { + /* Cache line 0 */ + uint64_t signature[4 + 1]; + uint64_t lru_list; + struct rte_bucket_4_32 *next; + uint64_t next_valid; + + /* Cache lines 1 and 2 */ + uint64_t key[4][4]; + + /* Cache line 3 */ + uint8_t data[0]; +}; + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t n_buckets; + uint32_t n_entries_per_bucket; + uint32_t key_size; + uint32_t entry_size; + uint32_t bucket_size; + uint32_t signature_offset; + uint32_t key_offset; + rte_table_hash_op_hash f_hash; + uint64_t seed; + + /* Extendible buckets */ + uint32_t n_buckets_ext; + uint32_t stack_pos; + uint32_t *stack; + + /* Lookup table */ + uint8_t memory[0] __rte_cache_aligned; +}; + +static int +check_params_create_lru(struct rte_table_hash_key32_lru_params *params) { + /* n_entries */ + if (params->n_entries == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n", + __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_create_key32_lru(void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_table_hash_key32_lru_params *p = + (struct rte_table_hash_key32_lru_params *) params; + struct rte_table_hash *f; + uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl; + uint32_t total_size, i; + + /* Check input parameters */ + if ((check_params_create_lru(p) != 0) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) { + return NULL; + } + n_entries_per_bucket = 4; + key_size = 32; + + /* Memory allocation */ + n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / + n_entries_per_bucket); + bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket + * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; + total_size = sizeof(struct rte_table_hash) + n_buckets * + bucket_size_cl * RTE_CACHE_LINE_SIZE; + + f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (f == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, + "%s: Hash table memory footprint is %u bytes\n", __func__, + total_size); + + /* Memory initialization */ + f->n_buckets = n_buckets; + f->n_entries_per_bucket = n_entries_per_bucket; + f->key_size = key_size; + f->entry_size = entry_size; + f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; + f->signature_offset = p->signature_offset; + f->key_offset = p->key_offset; + f->f_hash = p->f_hash; + f->seed = p->seed; + + for (i = 0; i < n_buckets; i++) { + struct rte_bucket_4_32 *bucket; + + bucket = (struct rte_bucket_4_32 *) &f->memory[i * + f->bucket_size]; + bucket->lru_list = 0x0000000100020003LLU; + } + + return f; +} + +static int +rte_table_hash_free_key32_lru(void *table) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (f == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + rte_free(f); + return 0; +} + +static int +rte_table_hash_entry_add_key32_lru( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_32 *bucket; + uint64_t signature, pos; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket = (struct rte_bucket_4_32 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + memcpy(bucket_data, entry, f->entry_size); + lru_update(bucket, i); + *key_found = 1; + *entry_ptr = (void *) bucket_data; + return 0; + } + } + + /* Key is not present in the bucket */ + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if (bucket_signature == 0) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + bucket->signature[i] = signature; + memcpy(bucket_key, key, f->key_size); + memcpy(bucket_data, entry, f->entry_size); + lru_update(bucket, i); + *key_found = 0; + *entry_ptr = (void *) bucket_data; + + return 0; + } + } + + /* Bucket full: replace LRU entry */ + pos = lru_pos(bucket); + bucket->signature[pos] = signature; + memcpy(bucket->key[pos], key, f->key_size); + memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size); + lru_update(bucket, pos); + *key_found = 0; + *entry_ptr = (void *) &bucket->data[pos * f->entry_size]; + + return 0; +} + +static int +rte_table_hash_entry_delete_key32_lru( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_32 *bucket; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket = (struct rte_bucket_4_32 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + bucket->signature[i] = 0; + *key_found = 1; + if (entry) + memcpy(entry, bucket_data, f->entry_size); + + return 0; + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +static int +check_params_create_ext(struct rte_table_hash_key32_ext_params *params) { + /* n_entries */ + if (params->n_entries == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); + return -EINVAL; + } + + /* n_entries_ext */ + if (params->n_entries_ext == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n", + __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_create_key32_ext(void *params, + int socket_id, + uint32_t entry_size) +{ + struct rte_table_hash_key32_ext_params *p = + (struct rte_table_hash_key32_ext_params *) params; + struct rte_table_hash *f; + uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket; + uint32_t key_size, bucket_size_cl, stack_size_cl, total_size, i; + + /* Check input parameters */ + if ((check_params_create_ext(p) != 0) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + ((sizeof(struct rte_bucket_4_32) % RTE_CACHE_LINE_SIZE) != 0)) + return NULL; + + n_entries_per_bucket = 4; + key_size = 32; + + /* Memory allocation */ + n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / + n_entries_per_bucket); + n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) / + n_entries_per_bucket; + bucket_size_cl = (sizeof(struct rte_bucket_4_32) + n_entries_per_bucket + * entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; + stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1) + / RTE_CACHE_LINE_SIZE; + total_size = sizeof(struct rte_table_hash) + + ((n_buckets + n_buckets_ext) * bucket_size_cl + stack_size_cl) * + RTE_CACHE_LINE_SIZE; + + f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (f == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, + "%s: Hash table memory footprint is %u bytes\n", __func__, + total_size); + + /* Memory initialization */ + f->n_buckets = n_buckets; + f->n_entries_per_bucket = n_entries_per_bucket; + f->key_size = key_size; + f->entry_size = entry_size; + f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; + f->signature_offset = p->signature_offset; + f->key_offset = p->key_offset; + f->f_hash = p->f_hash; + f->seed = p->seed; + + f->n_buckets_ext = n_buckets_ext; + f->stack_pos = n_buckets_ext; + f->stack = (uint32_t *) + &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size]; + + for (i = 0; i < n_buckets_ext; i++) + f->stack[i] = i; + + return f; +} + +static int +rte_table_hash_free_key32_ext(void *table) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (f == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + rte_free(f); + return 0; +} + +static int +rte_table_hash_entry_add_key32_ext( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket0 = (struct rte_bucket_4_32 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (bucket = bucket0; bucket != NULL; bucket = bucket->next) { + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + memcpy(bucket_data, entry, f->entry_size); + *key_found = 1; + *entry_ptr = (void *) bucket_data; + + return 0; + } + } + } + + /* Key is not present in the bucket */ + for (bucket_prev = NULL, bucket = bucket0; bucket != NULL; + bucket_prev = bucket, bucket = bucket->next) + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if (bucket_signature == 0) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + bucket->signature[i] = signature; + memcpy(bucket_key, key, f->key_size); + memcpy(bucket_data, entry, f->entry_size); + *key_found = 0; + *entry_ptr = (void *) bucket_data; + + return 0; + } + } + + /* Bucket full: extend bucket */ + if (f->stack_pos > 0) { + bucket_index = f->stack[--f->stack_pos]; + + bucket = (struct rte_bucket_4_32 *) + &f->memory[(f->n_buckets + bucket_index) * + f->bucket_size]; + bucket_prev->next = bucket; + bucket_prev->next_valid = 1; + + bucket->signature[0] = signature; + memcpy(bucket->key[0], key, f->key_size); + memcpy(&bucket->data[0], entry, f->entry_size); + *key_found = 0; + *entry_ptr = (void *) &bucket->data[0]; + return 0; + } + + return -ENOSPC; +} + +static int +rte_table_hash_entry_delete_key32_ext( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_32 *bucket0, *bucket, *bucket_prev; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket0 = (struct rte_bucket_4_32 *) + &f->memory[bucket_index * f->bucket_size]; + signature |= RTE_BUCKET_ENTRY_VALID; + + /* Key is present in the bucket */ + for (bucket_prev = NULL, bucket = bucket0; bucket != NULL; + bucket_prev = bucket, bucket = bucket->next) + for (i = 0; i < 4; i++) { + uint64_t bucket_signature = bucket->signature[i]; + uint8_t *bucket_key = (uint8_t *) bucket->key[i]; + + if ((bucket_signature == signature) && + (memcmp(key, bucket_key, f->key_size) == 0)) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + bucket->signature[i] = 0; + *key_found = 1; + if (entry) + memcpy(entry, bucket_data, + f->entry_size); + + if ((bucket->signature[0] == 0) && + (bucket->signature[1] == 0) && + (bucket->signature[2] == 0) && + (bucket->signature[3] == 0) && + (bucket_prev != NULL)) { + bucket_prev->next = bucket->next; + bucket_prev->next_valid = + bucket->next_valid; + + memset(bucket, 0, + sizeof(struct rte_bucket_4_32)); + bucket_index = (((uint8_t *)bucket - + (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets; + f->stack[f->stack_pos++] = bucket_index; + } + + return 0; + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +#define lookup_key32_cmp(key_in, bucket, pos) \ +{ \ + uint64_t xor[4][4], or[4], signature[4]; \ + \ + signature[0] = ((~bucket->signature[0]) & 1); \ + signature[1] = ((~bucket->signature[1]) & 1); \ + signature[2] = ((~bucket->signature[2]) & 1); \ + signature[3] = ((~bucket->signature[3]) & 1); \ + \ + xor[0][0] = key_in[0] ^ bucket->key[0][0]; \ + xor[0][1] = key_in[1] ^ bucket->key[0][1]; \ + xor[0][2] = key_in[2] ^ bucket->key[0][2]; \ + xor[0][3] = key_in[3] ^ bucket->key[0][3]; \ + \ + xor[1][0] = key_in[0] ^ bucket->key[1][0]; \ + xor[1][1] = key_in[1] ^ bucket->key[1][1]; \ + xor[1][2] = key_in[2] ^ bucket->key[1][2]; \ + xor[1][3] = key_in[3] ^ bucket->key[1][3]; \ + \ + xor[2][0] = key_in[0] ^ bucket->key[2][0]; \ + xor[2][1] = key_in[1] ^ bucket->key[2][1]; \ + xor[2][2] = key_in[2] ^ bucket->key[2][2]; \ + xor[2][3] = key_in[3] ^ bucket->key[2][3]; \ + \ + xor[3][0] = key_in[0] ^ bucket->key[3][0]; \ + xor[3][1] = key_in[1] ^ bucket->key[3][1]; \ + xor[3][2] = key_in[2] ^ bucket->key[3][2]; \ + xor[3][3] = key_in[3] ^ bucket->key[3][3]; \ + \ + or[0] = xor[0][0] | xor[0][1] | xor[0][2] | xor[0][3] | signature[0];\ + or[1] = xor[1][0] | xor[1][1] | xor[1][2] | xor[1][3] | signature[1];\ + or[2] = xor[2][0] | xor[2][1] | xor[2][2] | xor[2][3] | signature[2];\ + or[3] = xor[3][0] | xor[3][1] | xor[3][2] | xor[3][3] | signature[3];\ + \ + pos = 4; \ + if (or[0] == 0) \ + pos = 0; \ + if (or[1] == 0) \ + pos = 1; \ + if (or[2] == 0) \ + pos = 2; \ + if (or[3] == 0) \ + pos = 3; \ +} + +#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f) \ +{ \ + uint64_t pkt_mask; \ + uint32_t key_offset = f->key_offset; \ + \ + pkt0_index = __builtin_ctzll(pkts_mask); \ + pkt_mask = 1LLU << pkt0_index; \ + pkts_mask &= ~pkt_mask; \ + \ + mbuf0 = pkts[pkt0_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset));\ +} + +#define lookup1_stage1(mbuf1, bucket1, f) \ +{ \ + uint64_t signature; \ + uint32_t bucket_index; \ + \ + signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\ + bucket_index = signature & (f->n_buckets - 1); \ + bucket1 = (struct rte_bucket_4_32 *) \ + &f->memory[bucket_index * f->bucket_size]; \ + rte_prefetch0(bucket1); \ + rte_prefetch0((void *)(((uintptr_t) bucket1) + RTE_CACHE_LINE_SIZE));\ + rte_prefetch0((void *)(((uintptr_t) bucket1) + 2 * RTE_CACHE_LINE_SIZE));\ +} + +#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2, \ + pkts_mask_out, entries, f) \ +{ \ + void *a; \ + uint64_t pkt_mask; \ + uint64_t *key; \ + uint32_t pos; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ + \ + lookup_key32_cmp(key, bucket2, pos); \ + \ + pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket2->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt2_index] = a; \ + lru_update(bucket2, pos); \ +} + +#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\ + entries, buckets_mask, buckets, keys, f) \ +{ \ + struct rte_bucket_4_32 *bucket_next; \ + void *a; \ + uint64_t pkt_mask, bucket_mask; \ + uint64_t *key; \ + uint32_t pos; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ + \ + lookup_key32_cmp(key, bucket2, pos); \ + \ + pkt_mask = (bucket2->signature[pos] & 1LLU) << pkt2_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket2->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt2_index] = a; \ + \ + bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\ + buckets_mask |= bucket_mask; \ + bucket_next = bucket2->next; \ + buckets[pkt2_index] = bucket_next; \ + keys[pkt2_index] = key; \ +} + +#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, \ + entries, buckets_mask, f) \ +{ \ + struct rte_bucket_4_32 *bucket, *bucket_next; \ + void *a; \ + uint64_t pkt_mask, bucket_mask; \ + uint64_t *key; \ + uint32_t pos; \ + \ + bucket = buckets[pkt_index]; \ + key = keys[pkt_index]; \ + \ + lookup_key32_cmp(key, bucket, pos); \ + \ + pkt_mask = (bucket->signature[pos] & 1LLU) << pkt_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt_index] = a; \ + \ + bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\ + buckets_mask |= bucket_mask; \ + bucket_next = bucket->next; \ + rte_prefetch0(bucket_next); \ + rte_prefetch0((void *)(((uintptr_t) bucket_next) + RTE_CACHE_LINE_SIZE));\ + rte_prefetch0((void *)(((uintptr_t) bucket_next) + \ + 2 * RTE_CACHE_LINE_SIZE)); \ + buckets[pkt_index] = bucket_next; \ + keys[pkt_index] = key; \ +} + +#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\ + pkts, pkts_mask, f) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + uint32_t key_offset = f->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + \ + mbuf00 = pkts[pkt00_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + \ + mbuf01 = pkts[pkt01_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\ + mbuf00, mbuf01, pkts, pkts_mask, f) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + uint32_t key_offset = f->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + \ + mbuf00 = pkts[pkt00_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset)); \ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + if (pkts_mask == 0) \ + pkt01_index = pkt00_index; \ + \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + \ + mbuf01 = pkts[pkt01_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset)); \ +} + +#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \ +{ \ + uint64_t signature10, signature11; \ + uint32_t bucket10_index, bucket11_index; \ + \ + signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\ + bucket10_index = signature10 & (f->n_buckets - 1); \ + bucket10 = (struct rte_bucket_4_32 *) \ + &f->memory[bucket10_index * f->bucket_size]; \ + rte_prefetch0(bucket10); \ + rte_prefetch0((void *)(((uintptr_t) bucket10) + RTE_CACHE_LINE_SIZE));\ + rte_prefetch0((void *)(((uintptr_t) bucket10) + 2 * RTE_CACHE_LINE_SIZE));\ + \ + signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\ + bucket11_index = signature11 & (f->n_buckets - 1); \ + bucket11 = (struct rte_bucket_4_32 *) \ + &f->memory[bucket11_index * f->bucket_size]; \ + rte_prefetch0(bucket11); \ + rte_prefetch0((void *)(((uintptr_t) bucket11) + RTE_CACHE_LINE_SIZE));\ + rte_prefetch0((void *)(((uintptr_t) bucket11) + 2 * RTE_CACHE_LINE_SIZE));\ +} + +#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\ + bucket20, bucket21, pkts_mask_out, entries, f) \ +{ \ + void *a20, *a21; \ + uint64_t pkt20_mask, pkt21_mask; \ + uint64_t *key20, *key21; \ + uint32_t pos20, pos21; \ + \ + key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ + key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ + \ + lookup_key32_cmp(key20, bucket20, pos20); \ + lookup_key32_cmp(key21, bucket21, pos21); \ + \ + pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ + pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ + pkts_mask_out |= pkt20_mask | pkt21_mask; \ + \ + a20 = (void *) &bucket20->data[pos20 * f->entry_size]; \ + a21 = (void *) &bucket21->data[pos21 * f->entry_size]; \ + rte_prefetch0(a20); \ + rte_prefetch0(a21); \ + entries[pkt20_index] = a20; \ + entries[pkt21_index] = a21; \ + lru_update(bucket20, pos20); \ + lru_update(bucket21, pos21); \ +} + +#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \ + bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\ +{ \ + struct rte_bucket_4_32 *bucket20_next, *bucket21_next; \ + void *a20, *a21; \ + uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\ + uint64_t *key20, *key21; \ + uint32_t pos20, pos21; \ + \ + key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ + key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ + \ + lookup_key32_cmp(key20, bucket20, pos20); \ + lookup_key32_cmp(key21, bucket21, pos21); \ + \ + pkt20_mask = (bucket20->signature[pos20] & 1LLU) << pkt20_index;\ + pkt21_mask = (bucket21->signature[pos21] & 1LLU) << pkt21_index;\ + pkts_mask_out |= pkt20_mask | pkt21_mask; \ + \ + a20 = (void *) &bucket20->data[pos20 * f->entry_size]; \ + a21 = (void *) &bucket21->data[pos21 * f->entry_size]; \ + rte_prefetch0(a20); \ + rte_prefetch0(a21); \ + entries[pkt20_index] = a20; \ + entries[pkt21_index] = a21; \ + \ + bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\ + bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\ + buckets_mask |= bucket20_mask | bucket21_mask; \ + bucket20_next = bucket20->next; \ + bucket21_next = bucket21->next; \ + buckets[pkt20_index] = bucket20_next; \ + buckets[pkt21_index] = bucket21_next; \ + keys[pkt20_index] = key20; \ + keys[pkt21_index] = key21; \ +} + +static int +rte_table_hash_lookup_key32_lru( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_32 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1(mbuf, bucket, f); + lookup1_stage2_lru(pkt_index, mbuf, bucket, + pkts_mask_out, entries, f); + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, + mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, + entries, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, + mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, + mbuf20, mbuf21, bucket20, bucket21, pkts_mask_out, entries, f); + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key32_lru() */ + +static int +rte_table_hash_lookup_key32_ext( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_32 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0, buckets_mask = 0; + struct rte_bucket_4_32 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY32_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_32 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1(mbuf, bucket, f); + lookup1_stage2_ext(pkt_index, mbuf, bucket, + pkts_mask_out, entries, buckets_mask, buckets, + keys, f); + } + + goto grind_next_buckets; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + +grind_next_buckets: + /* Grind next buckets */ + for ( ; buckets_mask; ) { + uint64_t buckets_mask_next = 0; + + for ( ; buckets_mask; ) { + uint64_t pkt_mask; + uint32_t pkt_index; + + pkt_index = __builtin_ctzll(buckets_mask); + pkt_mask = 1LLU << pkt_index; + buckets_mask &= ~pkt_mask; + + lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, + entries, buckets_mask_next, f); + } + + buckets_mask = buckets_mask_next; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY32_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key32_ext() */ + +static int +rte_table_hash_key32_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_key32_lru_ops = { + .f_create = rte_table_hash_create_key32_lru, + .f_free = rte_table_hash_free_key32_lru, + .f_add = rte_table_hash_entry_add_key32_lru, + .f_delete = rte_table_hash_entry_delete_key32_lru, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key32_lru, + .f_stats = rte_table_hash_key32_stats_read, +}; + +struct rte_table_ops rte_table_hash_key32_ext_ops = { + .f_create = rte_table_hash_create_key32_ext, + .f_free = rte_table_hash_free_key32_ext, + .f_add = rte_table_hash_entry_add_key32_ext, + .f_delete = rte_table_hash_entry_delete_key32_ext, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key32_ext, + .f_stats = rte_table_hash_key32_stats_read, +}; diff --git a/lib/librte_table/rte_table_hash_key8.c b/lib/librte_table/rte_table_hash_key8.c new file mode 100644 index 00000000..e2e2bdc4 --- /dev/null +++ b/lib/librte_table/rte_table_hash_key8.c @@ -0,0 +1,1471 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include +#include +#include + +#include "rte_table_hash.h" +#include "rte_lru.h" + +#define RTE_TABLE_HASH_KEY_SIZE 8 + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_bucket_4_8 { + /* Cache line 0 */ + uint64_t signature; + uint64_t lru_list; + struct rte_bucket_4_8 *next; + uint64_t next_valid; + + uint64_t key[4]; + + /* Cache line 1 */ + uint8_t data[0]; +}; + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t n_buckets; + uint32_t n_entries_per_bucket; + uint32_t key_size; + uint32_t entry_size; + uint32_t bucket_size; + uint32_t signature_offset; + uint32_t key_offset; + uint64_t key_mask; + rte_table_hash_op_hash f_hash; + uint64_t seed; + + /* Extendible buckets */ + uint32_t n_buckets_ext; + uint32_t stack_pos; + uint32_t *stack; + + /* Lookup table */ + uint8_t memory[0] __rte_cache_aligned; +}; + +static int +check_params_create_lru(struct rte_table_hash_key8_lru_params *params) { + /* n_entries */ + if (params->n_entries == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n", + __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_create_key8_lru(void *params, int socket_id, uint32_t entry_size) +{ + struct rte_table_hash_key8_lru_params *p = + (struct rte_table_hash_key8_lru_params *) params; + struct rte_table_hash *f; + uint32_t n_buckets, n_entries_per_bucket, key_size, bucket_size_cl; + uint32_t total_size, i; + + /* Check input parameters */ + if ((check_params_create_lru(p) != 0) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) { + return NULL; + } + n_entries_per_bucket = 4; + key_size = 8; + + /* Memory allocation */ + n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / + n_entries_per_bucket); + bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket * + entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; + total_size = sizeof(struct rte_table_hash) + n_buckets * + bucket_size_cl * RTE_CACHE_LINE_SIZE; + + f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (f == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, + "%s: Hash table memory footprint is %u bytes\n", + __func__, total_size); + + /* Memory initialization */ + f->n_buckets = n_buckets; + f->n_entries_per_bucket = n_entries_per_bucket; + f->key_size = key_size; + f->entry_size = entry_size; + f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; + f->signature_offset = p->signature_offset; + f->key_offset = p->key_offset; + f->f_hash = p->f_hash; + f->seed = p->seed; + + if (p->key_mask != NULL) + f->key_mask = ((uint64_t *)p->key_mask)[0]; + else + f->key_mask = 0xFFFFFFFFFFFFFFFFLLU; + + for (i = 0; i < n_buckets; i++) { + struct rte_bucket_4_8 *bucket; + + bucket = (struct rte_bucket_4_8 *) &f->memory[i * + f->bucket_size]; + bucket->lru_list = 0x0000000100020003LLU; + } + + return f; +} + +static int +rte_table_hash_free_key8_lru(void *table) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (f == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + rte_free(f); + return 0; +} + +static int +rte_table_hash_entry_add_key8_lru( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket; + uint64_t signature, mask, pos; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket = (struct rte_bucket_4_8 *) + &f->memory[bucket_index * f->bucket_size]; + + /* Key is present in the bucket */ + for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { + uint64_t bucket_signature = bucket->signature; + uint64_t bucket_key = bucket->key[i]; + + if ((bucket_signature & mask) && + (*((uint64_t *) key) == bucket_key)) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + memcpy(bucket_data, entry, f->entry_size); + lru_update(bucket, i); + *key_found = 1; + *entry_ptr = (void *) bucket_data; + return 0; + } + } + + /* Key is not present in the bucket */ + for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { + uint64_t bucket_signature = bucket->signature; + + if ((bucket_signature & mask) == 0) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + bucket->signature |= mask; + bucket->key[i] = *((uint64_t *) key); + memcpy(bucket_data, entry, f->entry_size); + lru_update(bucket, i); + *key_found = 0; + *entry_ptr = (void *) bucket_data; + + return 0; + } + } + + /* Bucket full: replace LRU entry */ + pos = lru_pos(bucket); + bucket->key[pos] = *((uint64_t *) key); + memcpy(&bucket->data[pos * f->entry_size], entry, f->entry_size); + lru_update(bucket, pos); + *key_found = 0; + *entry_ptr = (void *) &bucket->data[pos * f->entry_size]; + + return 0; +} + +static int +rte_table_hash_entry_delete_key8_lru( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket; + uint64_t signature, mask; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket = (struct rte_bucket_4_8 *) + &f->memory[bucket_index * f->bucket_size]; + + /* Key is present in the bucket */ + for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { + uint64_t bucket_signature = bucket->signature; + uint64_t bucket_key = bucket->key[i]; + + if ((bucket_signature & mask) && + (*((uint64_t *) key) == bucket_key)) { + uint8_t *bucket_data = &bucket->data[i * f->entry_size]; + + bucket->signature &= ~mask; + *key_found = 1; + if (entry) + memcpy(entry, bucket_data, f->entry_size); + + return 0; + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +static int +check_params_create_ext(struct rte_table_hash_key8_ext_params *params) { + /* n_entries */ + if (params->n_entries == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries is zero\n", __func__); + return -EINVAL; + } + + /* n_entries_ext */ + if (params->n_entries_ext == 0) { + RTE_LOG(ERR, TABLE, "%s: n_entries_ext is zero\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "%s: f_hash function pointer is NULL\n", + __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_create_key8_ext(void *params, int socket_id, uint32_t entry_size) +{ + struct rte_table_hash_key8_ext_params *p = + (struct rte_table_hash_key8_ext_params *) params; + struct rte_table_hash *f; + uint32_t n_buckets, n_buckets_ext, n_entries_per_bucket, key_size; + uint32_t bucket_size_cl, stack_size_cl, total_size, i; + + /* Check input parameters */ + if ((check_params_create_ext(p) != 0) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + ((sizeof(struct rte_bucket_4_8) % RTE_CACHE_LINE_SIZE) != 0)) + return NULL; + + n_entries_per_bucket = 4; + key_size = 8; + + /* Memory allocation */ + n_buckets = rte_align32pow2((p->n_entries + n_entries_per_bucket - 1) / + n_entries_per_bucket); + n_buckets_ext = (p->n_entries_ext + n_entries_per_bucket - 1) / + n_entries_per_bucket; + bucket_size_cl = (sizeof(struct rte_bucket_4_8) + n_entries_per_bucket * + entry_size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE; + stack_size_cl = (n_buckets_ext * sizeof(uint32_t) + RTE_CACHE_LINE_SIZE - 1) + / RTE_CACHE_LINE_SIZE; + total_size = sizeof(struct rte_table_hash) + ((n_buckets + + n_buckets_ext) * bucket_size_cl + stack_size_cl) * + RTE_CACHE_LINE_SIZE; + + f = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (f == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, + "%s: Hash table memory footprint is %u bytes\n", + __func__, total_size); + + /* Memory initialization */ + f->n_buckets = n_buckets; + f->n_entries_per_bucket = n_entries_per_bucket; + f->key_size = key_size; + f->entry_size = entry_size; + f->bucket_size = bucket_size_cl * RTE_CACHE_LINE_SIZE; + f->signature_offset = p->signature_offset; + f->key_offset = p->key_offset; + f->f_hash = p->f_hash; + f->seed = p->seed; + + f->n_buckets_ext = n_buckets_ext; + f->stack_pos = n_buckets_ext; + f->stack = (uint32_t *) + &f->memory[(n_buckets + n_buckets_ext) * f->bucket_size]; + + if (p->key_mask != NULL) + f->key_mask = ((uint64_t *)p->key_mask)[0]; + else + f->key_mask = 0xFFFFFFFFFFFFFFFFLLU; + + for (i = 0; i < n_buckets_ext; i++) + f->stack[i] = i; + + return f; +} + +static int +rte_table_hash_free_key8_ext(void *table) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (f == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + rte_free(f); + return 0; +} + +static int +rte_table_hash_entry_add_key8_ext( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket0 = (struct rte_bucket_4_8 *) + &f->memory[bucket_index * f->bucket_size]; + + /* Key is present in the bucket */ + for (bucket = bucket0; bucket != NULL; bucket = bucket->next) { + uint64_t mask; + + for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { + uint64_t bucket_signature = bucket->signature; + uint64_t bucket_key = bucket->key[i]; + + if ((bucket_signature & mask) && + (*((uint64_t *) key) == bucket_key)) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + memcpy(bucket_data, entry, f->entry_size); + *key_found = 1; + *entry_ptr = (void *) bucket_data; + return 0; + } + } + } + + /* Key is not present in the bucket */ + for (bucket_prev = NULL, bucket = bucket0; + bucket != NULL; bucket_prev = bucket, bucket = bucket->next) { + uint64_t mask; + + for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { + uint64_t bucket_signature = bucket->signature; + + if ((bucket_signature & mask) == 0) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + bucket->signature |= mask; + bucket->key[i] = *((uint64_t *) key); + memcpy(bucket_data, entry, f->entry_size); + *key_found = 0; + *entry_ptr = (void *) bucket_data; + + return 0; + } + } + } + + /* Bucket full: extend bucket */ + if (f->stack_pos > 0) { + bucket_index = f->stack[--f->stack_pos]; + + bucket = (struct rte_bucket_4_8 *) &f->memory[(f->n_buckets + + bucket_index) * f->bucket_size]; + bucket_prev->next = bucket; + bucket_prev->next_valid = 1; + + bucket->signature = 1; + bucket->key[0] = *((uint64_t *) key); + memcpy(&bucket->data[0], entry, f->entry_size); + *key_found = 0; + *entry_ptr = (void *) &bucket->data[0]; + return 0; + } + + return -ENOSPC; +} + +static int +rte_table_hash_entry_delete_key8_ext( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket0, *bucket, *bucket_prev; + uint64_t signature; + uint32_t bucket_index, i; + + signature = f->f_hash(key, f->key_size, f->seed); + bucket_index = signature & (f->n_buckets - 1); + bucket0 = (struct rte_bucket_4_8 *) + &f->memory[bucket_index * f->bucket_size]; + + /* Key is present in the bucket */ + for (bucket_prev = NULL, bucket = bucket0; bucket != NULL; + bucket_prev = bucket, bucket = bucket->next) { + uint64_t mask; + + for (i = 0, mask = 1LLU; i < 4; i++, mask <<= 1) { + uint64_t bucket_signature = bucket->signature; + uint64_t bucket_key = bucket->key[i]; + + if ((bucket_signature & mask) && + (*((uint64_t *) key) == bucket_key)) { + uint8_t *bucket_data = &bucket->data[i * + f->entry_size]; + + bucket->signature &= ~mask; + *key_found = 1; + if (entry) + memcpy(entry, bucket_data, + f->entry_size); + + if ((bucket->signature == 0) && + (bucket_prev != NULL)) { + bucket_prev->next = bucket->next; + bucket_prev->next_valid = + bucket->next_valid; + + memset(bucket, 0, + sizeof(struct rte_bucket_4_8)); + bucket_index = (((uint8_t *)bucket - + (uint8_t *)f->memory)/f->bucket_size) - f->n_buckets; + f->stack[f->stack_pos++] = bucket_index; + } + + return 0; + } + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +#define lookup_key8_cmp(key_in, bucket, pos) \ +{ \ + uint64_t xor[4], signature; \ + \ + signature = ~bucket->signature; \ + \ + xor[0] = (key_in[0] ^ bucket->key[0]) | (signature & 1);\ + xor[1] = (key_in[0] ^ bucket->key[1]) | (signature & 2);\ + xor[2] = (key_in[0] ^ bucket->key[2]) | (signature & 4);\ + xor[3] = (key_in[0] ^ bucket->key[3]) | (signature & 8);\ + \ + pos = 4; \ + if (xor[0] == 0) \ + pos = 0; \ + if (xor[1] == 0) \ + pos = 1; \ + if (xor[2] == 0) \ + pos = 2; \ + if (xor[3] == 0) \ + pos = 3; \ +} + +#define lookup1_stage0(pkt0_index, mbuf0, pkts, pkts_mask, f) \ +{ \ + uint64_t pkt_mask; \ + uint32_t key_offset = f->key_offset;\ + \ + pkt0_index = __builtin_ctzll(pkts_mask); \ + pkt_mask = 1LLU << pkt0_index; \ + pkts_mask &= ~pkt_mask; \ + \ + mbuf0 = pkts[pkt0_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf0, key_offset)); \ +} + +#define lookup1_stage1(mbuf1, bucket1, f) \ +{ \ + uint64_t signature; \ + uint32_t bucket_index; \ + \ + signature = RTE_MBUF_METADATA_UINT32(mbuf1, f->signature_offset);\ + bucket_index = signature & (f->n_buckets - 1); \ + bucket1 = (struct rte_bucket_4_8 *) \ + &f->memory[bucket_index * f->bucket_size]; \ + rte_prefetch0(bucket1); \ +} + +#define lookup1_stage1_dosig(mbuf1, bucket1, f) \ +{ \ + uint64_t *key; \ + uint64_t signature; \ + uint32_t bucket_index; \ + uint64_t hash_key_buffer; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf1, f->key_offset);\ + hash_key_buffer = *key & f->key_mask; \ + signature = f->f_hash(&hash_key_buffer, \ + RTE_TABLE_HASH_KEY_SIZE, f->seed); \ + bucket_index = signature & (f->n_buckets - 1); \ + bucket1 = (struct rte_bucket_4_8 *) \ + &f->memory[bucket_index * f->bucket_size]; \ + rte_prefetch0(bucket1); \ +} + +#define lookup1_stage2_lru(pkt2_index, mbuf2, bucket2, \ + pkts_mask_out, entries, f) \ +{ \ + void *a; \ + uint64_t pkt_mask; \ + uint64_t *key; \ + uint32_t pos; \ + uint64_t hash_key_buffer; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ + hash_key_buffer = key[0] & f->key_mask; \ + \ + lookup_key8_cmp((&hash_key_buffer), bucket2, pos); \ + \ + pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket2->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt2_index] = a; \ + lru_update(bucket2, pos); \ +} + +#define lookup1_stage2_ext(pkt2_index, mbuf2, bucket2, pkts_mask_out,\ + entries, buckets_mask, buckets, keys, f) \ +{ \ + struct rte_bucket_4_8 *bucket_next; \ + void *a; \ + uint64_t pkt_mask, bucket_mask; \ + uint64_t *key; \ + uint32_t pos; \ + uint64_t hash_key_buffer; \ + \ + key = RTE_MBUF_METADATA_UINT64_PTR(mbuf2, f->key_offset);\ + hash_key_buffer = *key & f->key_mask; \ + \ + lookup_key8_cmp((&hash_key_buffer), bucket2, pos); \ + \ + pkt_mask = ((bucket2->signature >> pos) & 1LLU) << pkt2_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket2->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt2_index] = a; \ + \ + bucket_mask = (~pkt_mask) & (bucket2->next_valid << pkt2_index);\ + buckets_mask |= bucket_mask; \ + bucket_next = bucket2->next; \ + buckets[pkt2_index] = bucket_next; \ + keys[pkt2_index] = key; \ +} + +#define lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, entries,\ + buckets_mask, f) \ +{ \ + struct rte_bucket_4_8 *bucket, *bucket_next; \ + void *a; \ + uint64_t pkt_mask, bucket_mask; \ + uint64_t *key; \ + uint32_t pos; \ + uint64_t hash_key_buffer; \ + \ + bucket = buckets[pkt_index]; \ + key = keys[pkt_index]; \ + hash_key_buffer = (*key) & f->key_mask; \ + \ + lookup_key8_cmp((&hash_key_buffer), bucket, pos); \ + \ + pkt_mask = ((bucket->signature >> pos) & 1LLU) << pkt_index;\ + pkts_mask_out |= pkt_mask; \ + \ + a = (void *) &bucket->data[pos * f->entry_size]; \ + rte_prefetch0(a); \ + entries[pkt_index] = a; \ + \ + bucket_mask = (~pkt_mask) & (bucket->next_valid << pkt_index);\ + buckets_mask |= bucket_mask; \ + bucket_next = bucket->next; \ + rte_prefetch0(bucket_next); \ + buckets[pkt_index] = bucket_next; \ + keys[pkt_index] = key; \ +} + +#define lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01,\ + pkts, pkts_mask, f) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + uint32_t key_offset = f->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + \ + mbuf00 = pkts[pkt00_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + \ + mbuf01 = pkts[pkt01_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage0_with_odd_support(pkt00_index, pkt01_index,\ + mbuf00, mbuf01, pkts, pkts_mask, f) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + uint32_t key_offset = f->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + \ + mbuf00 = pkts[pkt00_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + if (pkts_mask == 0) \ + pkt01_index = pkt00_index; \ + \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + \ + mbuf01 = pkts[pkt01_index]; \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f) \ +{ \ + uint64_t signature10, signature11; \ + uint32_t bucket10_index, bucket11_index; \ + \ + signature10 = RTE_MBUF_METADATA_UINT32(mbuf10, f->signature_offset);\ + bucket10_index = signature10 & (f->n_buckets - 1); \ + bucket10 = (struct rte_bucket_4_8 *) \ + &f->memory[bucket10_index * f->bucket_size]; \ + rte_prefetch0(bucket10); \ + \ + signature11 = RTE_MBUF_METADATA_UINT32(mbuf11, f->signature_offset);\ + bucket11_index = signature11 & (f->n_buckets - 1); \ + bucket11 = (struct rte_bucket_4_8 *) \ + &f->memory[bucket11_index * f->bucket_size]; \ + rte_prefetch0(bucket11); \ +} + +#define lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f)\ +{ \ + uint64_t *key10, *key11; \ + uint64_t hash_offset_buffer10; \ + uint64_t hash_offset_buffer11; \ + uint64_t signature10, signature11; \ + uint32_t bucket10_index, bucket11_index; \ + rte_table_hash_op_hash f_hash = f->f_hash; \ + uint64_t seed = f->seed; \ + uint32_t key_offset = f->key_offset; \ + \ + key10 = RTE_MBUF_METADATA_UINT64_PTR(mbuf10, key_offset);\ + key11 = RTE_MBUF_METADATA_UINT64_PTR(mbuf11, key_offset);\ + hash_offset_buffer10 = *key10 & f->key_mask; \ + hash_offset_buffer11 = *key11 & f->key_mask; \ + \ + signature10 = f_hash(&hash_offset_buffer10, \ + RTE_TABLE_HASH_KEY_SIZE, seed); \ + bucket10_index = signature10 & (f->n_buckets - 1); \ + bucket10 = (struct rte_bucket_4_8 *) \ + &f->memory[bucket10_index * f->bucket_size]; \ + rte_prefetch0(bucket10); \ + \ + signature11 = f_hash(&hash_offset_buffer11, \ + RTE_TABLE_HASH_KEY_SIZE, seed); \ + bucket11_index = signature11 & (f->n_buckets - 1); \ + bucket11 = (struct rte_bucket_4_8 *) \ + &f->memory[bucket11_index * f->bucket_size]; \ + rte_prefetch0(bucket11); \ +} + +#define lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21,\ + bucket20, bucket21, pkts_mask_out, entries, f) \ +{ \ + void *a20, *a21; \ + uint64_t pkt20_mask, pkt21_mask; \ + uint64_t *key20, *key21; \ + uint64_t hash_offset_buffer20; \ + uint64_t hash_offset_buffer21; \ + uint32_t pos20, pos21; \ + \ + key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ + key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ + hash_offset_buffer20 = *key20 & f->key_mask; \ + hash_offset_buffer21 = *key21 & f->key_mask; \ + \ + lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\ + lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\ + \ + pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\ + pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\ + pkts_mask_out |= pkt20_mask | pkt21_mask; \ + \ + a20 = (void *) &bucket20->data[pos20 * f->entry_size]; \ + a21 = (void *) &bucket21->data[pos21 * f->entry_size]; \ + rte_prefetch0(a20); \ + rte_prefetch0(a21); \ + entries[pkt20_index] = a20; \ + entries[pkt21_index] = a21; \ + lru_update(bucket20, pos20); \ + lru_update(bucket21, pos21); \ +} + +#define lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, bucket20, \ + bucket21, pkts_mask_out, entries, buckets_mask, buckets, keys, f)\ +{ \ + struct rte_bucket_4_8 *bucket20_next, *bucket21_next; \ + void *a20, *a21; \ + uint64_t pkt20_mask, pkt21_mask, bucket20_mask, bucket21_mask;\ + uint64_t *key20, *key21; \ + uint64_t hash_offset_buffer20; \ + uint64_t hash_offset_buffer21; \ + uint32_t pos20, pos21; \ + \ + key20 = RTE_MBUF_METADATA_UINT64_PTR(mbuf20, f->key_offset);\ + key21 = RTE_MBUF_METADATA_UINT64_PTR(mbuf21, f->key_offset);\ + hash_offset_buffer20 = *key20 & f->key_mask; \ + hash_offset_buffer21 = *key21 & f->key_mask; \ + \ + lookup_key8_cmp((&hash_offset_buffer20), bucket20, pos20);\ + lookup_key8_cmp((&hash_offset_buffer21), bucket21, pos21);\ + \ + pkt20_mask = ((bucket20->signature >> pos20) & 1LLU) << pkt20_index;\ + pkt21_mask = ((bucket21->signature >> pos21) & 1LLU) << pkt21_index;\ + pkts_mask_out |= pkt20_mask | pkt21_mask; \ + \ + a20 = (void *) &bucket20->data[pos20 * f->entry_size]; \ + a21 = (void *) &bucket21->data[pos21 * f->entry_size]; \ + rte_prefetch0(a20); \ + rte_prefetch0(a21); \ + entries[pkt20_index] = a20; \ + entries[pkt21_index] = a21; \ + \ + bucket20_mask = (~pkt20_mask) & (bucket20->next_valid << pkt20_index);\ + bucket21_mask = (~pkt21_mask) & (bucket21->next_valid << pkt21_index);\ + buckets_mask |= bucket20_mask | bucket21_mask; \ + bucket20_next = bucket20->next; \ + bucket21_next = bucket21->next; \ + buckets[pkt20_index] = bucket20_next; \ + buckets[pkt21_index] = bucket21_next; \ + keys[pkt20_index] = key20; \ + keys[pkt21_index] = key21; \ +} + +static int +rte_table_hash_lookup_key8_lru( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index, + pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_8 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1(mbuf, bucket, f); + lookup1_stage2_lru(pkt_index, mbuf, bucket, + pkts_mask_out, entries, f); + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key8_lru() */ + +static int +rte_table_hash_lookup_key8_lru_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_8 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1_dosig(mbuf, bucket, f); + lookup1_stage2_lru(pkt_index, mbuf, bucket, + pkts_mask_out, entries, f); + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_lru(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, f); + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key8_lru_dosig() */ + +static int +rte_table_hash_lookup_key8_ext( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0, buckets_mask = 0; + struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_8 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1(mbuf, bucket, f); + lookup1_stage2_ext(pkt_index, mbuf, bucket, + pkts_mask_out, entries, buckets_mask, buckets, + keys, f); + } + + goto grind_next_buckets; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + +grind_next_buckets: + /* Grind next buckets */ + for ( ; buckets_mask; ) { + uint64_t buckets_mask_next = 0; + + for ( ; buckets_mask; ) { + uint64_t pkt_mask; + uint32_t pkt_index; + + pkt_index = __builtin_ctzll(buckets_mask); + pkt_mask = 1LLU << pkt_index; + buckets_mask &= ~pkt_mask; + + lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, + entries, buckets_mask_next, f); + } + + buckets_mask = buckets_mask_next; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key8_ext() */ + +static int +rte_table_hash_lookup_key8_ext_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *f = (struct rte_table_hash *) table; + struct rte_bucket_4_8 *bucket10, *bucket11, *bucket20, *bucket21; + struct rte_mbuf *mbuf00, *mbuf01, *mbuf10, *mbuf11, *mbuf20, *mbuf21; + uint32_t pkt00_index, pkt01_index, pkt10_index; + uint32_t pkt11_index, pkt20_index, pkt21_index; + uint64_t pkts_mask_out = 0, buckets_mask = 0; + struct rte_bucket_4_8 *buckets[RTE_PORT_IN_BURST_SIZE_MAX]; + uint64_t *keys[RTE_PORT_IN_BURST_SIZE_MAX]; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_KEY8_STATS_PKTS_IN_ADD(f, n_pkts_in); + + /* Cannot run the pipeline with less than 5 packets */ + if (__builtin_popcountll(pkts_mask) < 5) { + for ( ; pkts_mask; ) { + struct rte_bucket_4_8 *bucket; + struct rte_mbuf *mbuf; + uint32_t pkt_index; + + lookup1_stage0(pkt_index, mbuf, pkts, pkts_mask, f); + lookup1_stage1_dosig(mbuf, bucket, f); + lookup1_stage2_ext(pkt_index, mbuf, bucket, + pkts_mask_out, entries, buckets_mask, + buckets, keys, f); + } + + goto grind_next_buckets; + } + + /* + * Pipeline fill + * + */ + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline feed */ + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(pkt00_index, pkt01_index, mbuf00, mbuf01, pkts, + pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(pkt00_index, pkt01_index, + mbuf00, mbuf01, pkts, pkts_mask, f); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + } + + /* + * Pipeline flush + * + */ + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + mbuf10 = mbuf00; + mbuf11 = mbuf01; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(mbuf10, mbuf11, bucket10, bucket11, f); + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + + /* Pipeline feed */ + bucket20 = bucket10; + bucket21 = bucket11; + mbuf20 = mbuf10; + mbuf21 = mbuf11; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2_ext(pkt20_index, pkt21_index, mbuf20, mbuf21, + bucket20, bucket21, pkts_mask_out, entries, + buckets_mask, buckets, keys, f); + +grind_next_buckets: + /* Grind next buckets */ + for ( ; buckets_mask; ) { + uint64_t buckets_mask_next = 0; + + for ( ; buckets_mask; ) { + uint64_t pkt_mask; + uint32_t pkt_index; + + pkt_index = __builtin_ctzll(buckets_mask); + pkt_mask = 1LLU << pkt_index; + buckets_mask &= ~pkt_mask; + + lookup_grinder(pkt_index, buckets, keys, pkts_mask_out, + entries, buckets_mask_next, f); + } + + buckets_mask = buckets_mask_next; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_KEY8_STATS_PKTS_LOOKUP_MISS(f, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} /* rte_table_hash_lookup_key8_dosig_ext() */ + +static int +rte_table_hash_key8_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_key8_lru_ops = { + .f_create = rte_table_hash_create_key8_lru, + .f_free = rte_table_hash_free_key8_lru, + .f_add = rte_table_hash_entry_add_key8_lru, + .f_delete = rte_table_hash_entry_delete_key8_lru, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key8_lru, + .f_stats = rte_table_hash_key8_stats_read, +}; + +struct rte_table_ops rte_table_hash_key8_lru_dosig_ops = { + .f_create = rte_table_hash_create_key8_lru, + .f_free = rte_table_hash_free_key8_lru, + .f_add = rte_table_hash_entry_add_key8_lru, + .f_delete = rte_table_hash_entry_delete_key8_lru, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key8_lru_dosig, + .f_stats = rte_table_hash_key8_stats_read, +}; + +struct rte_table_ops rte_table_hash_key8_ext_ops = { + .f_create = rte_table_hash_create_key8_ext, + .f_free = rte_table_hash_free_key8_ext, + .f_add = rte_table_hash_entry_add_key8_ext, + .f_delete = rte_table_hash_entry_delete_key8_ext, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key8_ext, + .f_stats = rte_table_hash_key8_stats_read, +}; + +struct rte_table_ops rte_table_hash_key8_ext_dosig_ops = { + .f_create = rte_table_hash_create_key8_ext, + .f_free = rte_table_hash_free_key8_ext, + .f_add = rte_table_hash_entry_add_key8_ext, + .f_delete = rte_table_hash_entry_delete_key8_ext, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lookup_key8_ext_dosig, + .f_stats = rte_table_hash_key8_stats_read, +}; diff --git a/lib/librte_table/rte_table_hash_lru.c b/lib/librte_table/rte_table_hash_lru.c new file mode 100644 index 00000000..407c62ab --- /dev/null +++ b/lib/librte_table/rte_table_hash_lru.c @@ -0,0 +1,1102 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "rte_table_hash.h" +#include "rte_lru.h" + +#define KEYS_PER_BUCKET 4 + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct bucket { + union { + struct bucket *next; + uint64_t lru_list; + }; + uint16_t sig[KEYS_PER_BUCKET]; + uint32_t key_pos[KEYS_PER_BUCKET]; +}; + +struct grinder { + struct bucket *bkt; + uint64_t sig; + uint64_t match; + uint64_t match_pos; + uint32_t key_index; +}; + +struct rte_table_hash { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t key_size; + uint32_t entry_size; + uint32_t n_keys; + uint32_t n_buckets; + rte_table_hash_op_hash f_hash; + uint64_t seed; + uint32_t signature_offset; + uint32_t key_offset; + + /* Internal */ + uint64_t bucket_mask; + uint32_t key_size_shl; + uint32_t data_size_shl; + uint32_t key_stack_tos; + + /* Grinder */ + struct grinder grinders[RTE_PORT_IN_BURST_SIZE_MAX]; + + /* Tables */ + struct bucket *buckets; + uint8_t *key_mem; + uint8_t *data_mem; + uint32_t *key_stack; + + /* Table memory */ + uint8_t memory[0] __rte_cache_aligned; +}; + +static int +check_params_create(struct rte_table_hash_lru_params *params) +{ + uint32_t n_buckets_min; + + /* key_size */ + if ((params->key_size == 0) || + (!rte_is_power_of_2(params->key_size))) { + RTE_LOG(ERR, TABLE, "%s: key_size invalid value\n", __func__); + return -EINVAL; + } + + /* n_keys */ + if ((params->n_keys == 0) || + (!rte_is_power_of_2(params->n_keys))) { + RTE_LOG(ERR, TABLE, "%s: n_keys invalid value\n", __func__); + return -EINVAL; + } + + /* n_buckets */ + n_buckets_min = (params->n_keys + KEYS_PER_BUCKET - 1) / params->n_keys; + if ((params->n_buckets == 0) || + (!rte_is_power_of_2(params->n_keys)) || + (params->n_buckets < n_buckets_min)) { + RTE_LOG(ERR, TABLE, "%s: n_buckets invalid value\n", __func__); + return -EINVAL; + } + + /* f_hash */ + if (params->f_hash == NULL) { + RTE_LOG(ERR, TABLE, "%s: f_hash invalid value\n", __func__); + return -EINVAL; + } + + return 0; +} + +static void * +rte_table_hash_lru_create(void *params, int socket_id, uint32_t entry_size) +{ + struct rte_table_hash_lru_params *p = + (struct rte_table_hash_lru_params *) params; + struct rte_table_hash *t; + uint32_t total_size, table_meta_sz; + uint32_t bucket_sz, key_sz, key_stack_sz, data_sz; + uint32_t bucket_offset, key_offset, key_stack_offset, data_offset; + uint32_t i; + + /* Check input parameters */ + if ((check_params_create(p) != 0) || + (!rte_is_power_of_2(entry_size)) || + ((sizeof(struct rte_table_hash) % RTE_CACHE_LINE_SIZE) != 0) || + (sizeof(struct bucket) != (RTE_CACHE_LINE_SIZE / 2))) { + return NULL; + } + + /* Memory allocation */ + table_meta_sz = RTE_CACHE_LINE_ROUNDUP(sizeof(struct rte_table_hash)); + bucket_sz = RTE_CACHE_LINE_ROUNDUP(p->n_buckets * sizeof(struct bucket)); + key_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * p->key_size); + key_stack_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * sizeof(uint32_t)); + data_sz = RTE_CACHE_LINE_ROUNDUP(p->n_keys * entry_size); + total_size = table_meta_sz + bucket_sz + key_sz + key_stack_sz + + data_sz; + + t = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, socket_id); + if (t == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for hash table\n", + __func__, total_size); + return NULL; + } + RTE_LOG(INFO, TABLE, "%s (%u-byte key): Hash table memory footprint is " + "%u bytes\n", __func__, p->key_size, total_size); + + /* Memory initialization */ + t->key_size = p->key_size; + t->entry_size = entry_size; + t->n_keys = p->n_keys; + t->n_buckets = p->n_buckets; + t->f_hash = p->f_hash; + t->seed = p->seed; + t->signature_offset = p->signature_offset; + t->key_offset = p->key_offset; + + /* Internal */ + t->bucket_mask = t->n_buckets - 1; + t->key_size_shl = __builtin_ctzl(p->key_size); + t->data_size_shl = __builtin_ctzl(entry_size); + + /* Tables */ + bucket_offset = 0; + key_offset = bucket_offset + bucket_sz; + key_stack_offset = key_offset + key_sz; + data_offset = key_stack_offset + key_stack_sz; + + t->buckets = (struct bucket *) &t->memory[bucket_offset]; + t->key_mem = &t->memory[key_offset]; + t->key_stack = (uint32_t *) &t->memory[key_stack_offset]; + t->data_mem = &t->memory[data_offset]; + + /* Key stack */ + for (i = 0; i < t->n_keys; i++) + t->key_stack[i] = t->n_keys - 1 - i; + t->key_stack_tos = t->n_keys; + + /* LRU */ + for (i = 0; i < t->n_buckets; i++) { + struct bucket *bkt = &t->buckets[i]; + + lru_init(bkt); + } + + return t; +} + +static int +rte_table_hash_lru_free(void *table) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + /* Check input parameters */ + if (t == NULL) + return -EINVAL; + + rte_free(t); + return 0; +} + +static int +rte_table_hash_lru_entry_add(void *table, void *key, void *entry, + int *key_found, void **entry_ptr) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct bucket *bkt; + uint64_t sig; + uint32_t bkt_index, i; + + sig = t->f_hash(key, t->key_size, t->seed); + bkt_index = sig & t->bucket_mask; + bkt = &t->buckets[bkt_index]; + sig = (sig >> 16) | 1LLU; + + /* Key is present in the bucket */ + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + uint32_t bkt_key_index = bkt->key_pos[i]; + uint8_t *bkt_key = &t->key_mem[bkt_key_index << + t->key_size_shl]; + + if ((sig == bkt_sig) && (memcmp(key, bkt_key, t->key_size) + == 0)) { + uint8_t *data = &t->data_mem[bkt_key_index << + t->data_size_shl]; + + memcpy(data, entry, t->entry_size); + lru_update(bkt, i); + *key_found = 1; + *entry_ptr = (void *) data; + return 0; + } + } + + /* Key is not present in the bucket */ + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + + if (bkt_sig == 0) { + uint32_t bkt_key_index; + uint8_t *bkt_key, *data; + + /* Allocate new key */ + if (t->key_stack_tos == 0) { + /* No keys available */ + return -ENOSPC; + } + bkt_key_index = t->key_stack[--t->key_stack_tos]; + + /* Install new key */ + bkt_key = &t->key_mem[bkt_key_index << t->key_size_shl]; + data = &t->data_mem[bkt_key_index << t->data_size_shl]; + + bkt->sig[i] = (uint16_t) sig; + bkt->key_pos[i] = bkt_key_index; + memcpy(bkt_key, key, t->key_size); + memcpy(data, entry, t->entry_size); + lru_update(bkt, i); + + *key_found = 0; + *entry_ptr = (void *) data; + return 0; + } + } + + /* Bucket full */ + { + uint64_t pos = lru_pos(bkt); + uint32_t bkt_key_index = bkt->key_pos[pos]; + uint8_t *bkt_key = &t->key_mem[bkt_key_index << + t->key_size_shl]; + uint8_t *data = &t->data_mem[bkt_key_index << t->data_size_shl]; + + bkt->sig[pos] = (uint16_t) sig; + memcpy(bkt_key, key, t->key_size); + memcpy(data, entry, t->entry_size); + lru_update(bkt, pos); + + *key_found = 0; + *entry_ptr = (void *) data; + return 0; + } +} + +static int +rte_table_hash_lru_entry_delete(void *table, void *key, int *key_found, + void *entry) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct bucket *bkt; + uint64_t sig; + uint32_t bkt_index, i; + + sig = t->f_hash(key, t->key_size, t->seed); + bkt_index = sig & t->bucket_mask; + bkt = &t->buckets[bkt_index]; + sig = (sig >> 16) | 1LLU; + + /* Key is present in the bucket */ + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + uint32_t bkt_key_index = bkt->key_pos[i]; + uint8_t *bkt_key = &t->key_mem[bkt_key_index << + t->key_size_shl]; + + if ((sig == bkt_sig) && + (memcmp(key, bkt_key, t->key_size) == 0)) { + uint8_t *data = &t->data_mem[bkt_key_index << + t->data_size_shl]; + + bkt->sig[i] = 0; + t->key_stack[t->key_stack_tos++] = bkt_key_index; + *key_found = 1; + memcpy(entry, data, t->entry_size); + return 0; + } + } + + /* Key is not present in the bucket */ + *key_found = 0; + return 0; +} + +static int rte_table_hash_lru_lookup_unoptimized( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries, + int dosig) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + uint64_t pkts_mask_out = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in); + + for ( ; pkts_mask; ) { + struct bucket *bkt; + struct rte_mbuf *pkt; + uint8_t *key; + uint64_t pkt_mask, sig; + uint32_t pkt_index, bkt_index, i; + + pkt_index = __builtin_ctzll(pkts_mask); + pkt_mask = 1LLU << pkt_index; + pkts_mask &= ~pkt_mask; + + pkt = pkts[pkt_index]; + key = RTE_MBUF_METADATA_UINT8_PTR(pkt, t->key_offset); + if (dosig) + sig = (uint64_t) t->f_hash(key, t->key_size, t->seed); + else + sig = RTE_MBUF_METADATA_UINT32(pkt, + t->signature_offset); + + bkt_index = sig & t->bucket_mask; + bkt = &t->buckets[bkt_index]; + sig = (sig >> 16) | 1LLU; + + /* Key is present in the bucket */ + for (i = 0; i < KEYS_PER_BUCKET; i++) { + uint64_t bkt_sig = (uint64_t) bkt->sig[i]; + uint32_t bkt_key_index = bkt->key_pos[i]; + uint8_t *bkt_key = &t->key_mem[bkt_key_index << + t->key_size_shl]; + + if ((sig == bkt_sig) && (memcmp(key, bkt_key, + t->key_size) == 0)) { + uint8_t *data = &t->data_mem[bkt_key_index << + t->data_size_shl]; + + lru_update(bkt, i); + pkts_mask_out |= pkt_mask; + entries[pkt_index] = (void *) data; + break; + } + } + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return 0; +} + +/*** +* +* mask = match bitmask +* match = at least one match +* match_many = more than one match +* match_pos = position of first match +* +* ---------------------------------------- +* mask match match_many match_pos +* ---------------------------------------- +* 0000 0 0 00 +* 0001 1 0 00 +* 0010 1 0 01 +* 0011 1 1 00 +* ---------------------------------------- +* 0100 1 0 10 +* 0101 1 1 00 +* 0110 1 1 01 +* 0111 1 1 00 +* ---------------------------------------- +* 1000 1 0 11 +* 1001 1 1 00 +* 1010 1 1 01 +* 1011 1 1 00 +* ---------------------------------------- +* 1100 1 1 10 +* 1101 1 1 00 +* 1110 1 1 01 +* 1111 1 1 00 +* ---------------------------------------- +* +* match = 1111_1111_1111_1110 +* match_many = 1111_1110_1110_1000 +* match_pos = 0001_0010_0001_0011__0001_0010_0001_0000 +* +* match = 0xFFFELLU +* match_many = 0xFEE8LLU +* match_pos = 0x12131210LLU +* +***/ + +#define LUT_MATCH 0xFFFELLU +#define LUT_MATCH_MANY 0xFEE8LLU +#define LUT_MATCH_POS 0x12131210LLU + +#define lookup_cmp_sig(mbuf_sig, bucket, match, match_many, match_pos)\ +{ \ + uint64_t bucket_sig[4], mask[4], mask_all; \ + \ + bucket_sig[0] = bucket->sig[0]; \ + bucket_sig[1] = bucket->sig[1]; \ + bucket_sig[2] = bucket->sig[2]; \ + bucket_sig[3] = bucket->sig[3]; \ + \ + bucket_sig[0] ^= mbuf_sig; \ + bucket_sig[1] ^= mbuf_sig; \ + bucket_sig[2] ^= mbuf_sig; \ + bucket_sig[3] ^= mbuf_sig; \ + \ + mask[0] = 0; \ + mask[1] = 0; \ + mask[2] = 0; \ + mask[3] = 0; \ + \ + if (bucket_sig[0] == 0) \ + mask[0] = 1; \ + if (bucket_sig[1] == 0) \ + mask[1] = 2; \ + if (bucket_sig[2] == 0) \ + mask[2] = 4; \ + if (bucket_sig[3] == 0) \ + mask[3] = 8; \ + \ + mask_all = (mask[0] | mask[1]) | (mask[2] | mask[3]); \ + \ + match = (LUT_MATCH >> mask_all) & 1; \ + match_many = (LUT_MATCH_MANY >> mask_all) & 1; \ + match_pos = (LUT_MATCH_POS >> (mask_all << 1)) & 3; \ +} + +#define lookup_cmp_key(mbuf, key, match_key, f) \ +{ \ + uint64_t *pkt_key = RTE_MBUF_METADATA_UINT64_PTR(mbuf, f->key_offset);\ + uint64_t *bkt_key = (uint64_t *) key; \ + \ + switch (f->key_size) { \ + case 8: \ + { \ + uint64_t xor = pkt_key[0] ^ bkt_key[0]; \ + match_key = 0; \ + if (xor == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 16: \ + { \ + uint64_t xor[2], or; \ + \ + xor[0] = pkt_key[0] ^ bkt_key[0]; \ + xor[1] = pkt_key[1] ^ bkt_key[1]; \ + or = xor[0] | xor[1]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 32: \ + { \ + uint64_t xor[4], or; \ + \ + xor[0] = pkt_key[0] ^ bkt_key[0]; \ + xor[1] = pkt_key[1] ^ bkt_key[1]; \ + xor[2] = pkt_key[2] ^ bkt_key[2]; \ + xor[3] = pkt_key[3] ^ bkt_key[3]; \ + or = xor[0] | xor[1] | xor[2] | xor[3]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + case 64: \ + { \ + uint64_t xor[8], or; \ + \ + xor[0] = pkt_key[0] ^ bkt_key[0]; \ + xor[1] = pkt_key[1] ^ bkt_key[1]; \ + xor[2] = pkt_key[2] ^ bkt_key[2]; \ + xor[3] = pkt_key[3] ^ bkt_key[3]; \ + xor[4] = pkt_key[4] ^ bkt_key[4]; \ + xor[5] = pkt_key[5] ^ bkt_key[5]; \ + xor[6] = pkt_key[6] ^ bkt_key[6]; \ + xor[7] = pkt_key[7] ^ bkt_key[7]; \ + or = xor[0] | xor[1] | xor[2] | xor[3] | \ + xor[4] | xor[5] | xor[6] | xor[7]; \ + match_key = 0; \ + if (or == 0) \ + match_key = 1; \ + } \ + break; \ + \ + default: \ + match_key = 0; \ + if (memcmp(pkt_key, bkt_key, f->key_size) == 0) \ + match_key = 1; \ + } \ +} + +#define lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index)\ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + struct rte_mbuf *mbuf00, *mbuf01; \ + uint32_t key_offset = t->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + mbuf00 = pkts[pkt00_index]; \ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + mbuf01 = pkts[pkt01_index]; \ + \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, pkt00_index, \ + pkt01_index) \ +{ \ + uint64_t pkt00_mask, pkt01_mask; \ + struct rte_mbuf *mbuf00, *mbuf01; \ + uint32_t key_offset = t->key_offset; \ + \ + pkt00_index = __builtin_ctzll(pkts_mask); \ + pkt00_mask = 1LLU << pkt00_index; \ + pkts_mask &= ~pkt00_mask; \ + mbuf00 = pkts[pkt00_index]; \ + \ + pkt01_index = __builtin_ctzll(pkts_mask); \ + if (pkts_mask == 0) \ + pkt01_index = pkt00_index; \ + \ + pkt01_mask = 1LLU << pkt01_index; \ + pkts_mask &= ~pkt01_mask; \ + mbuf01 = pkts[pkt01_index]; \ + \ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf00, key_offset));\ + rte_prefetch0(RTE_MBUF_METADATA_UINT8_PTR(mbuf01, key_offset));\ +} + +#define lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index) \ +{ \ + struct grinder *g10, *g11; \ + uint64_t sig10, sig11, bkt10_index, bkt11_index; \ + struct rte_mbuf *mbuf10, *mbuf11; \ + struct bucket *bkt10, *bkt11, *buckets = t->buckets; \ + uint64_t bucket_mask = t->bucket_mask; \ + uint32_t signature_offset = t->signature_offset; \ + \ + mbuf10 = pkts[pkt10_index]; \ + sig10 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf10, signature_offset);\ + bkt10_index = sig10 & bucket_mask; \ + bkt10 = &buckets[bkt10_index]; \ + \ + mbuf11 = pkts[pkt11_index]; \ + sig11 = (uint64_t) RTE_MBUF_METADATA_UINT32(mbuf11, signature_offset);\ + bkt11_index = sig11 & bucket_mask; \ + bkt11 = &buckets[bkt11_index]; \ + \ + rte_prefetch0(bkt10); \ + rte_prefetch0(bkt11); \ + \ + g10 = &g[pkt10_index]; \ + g10->sig = sig10; \ + g10->bkt = bkt10; \ + \ + g11 = &g[pkt11_index]; \ + g11->sig = sig11; \ + g11->bkt = bkt11; \ +} + +#define lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index)\ +{ \ + struct grinder *g10, *g11; \ + uint64_t sig10, sig11, bkt10_index, bkt11_index; \ + struct rte_mbuf *mbuf10, *mbuf11; \ + struct bucket *bkt10, *bkt11, *buckets = t->buckets; \ + uint8_t *key10, *key11; \ + uint64_t bucket_mask = t->bucket_mask; \ + rte_table_hash_op_hash f_hash = t->f_hash; \ + uint64_t seed = t->seed; \ + uint32_t key_size = t->key_size; \ + uint32_t key_offset = t->key_offset; \ + \ + mbuf10 = pkts[pkt10_index]; \ + key10 = RTE_MBUF_METADATA_UINT8_PTR(mbuf10, key_offset);\ + sig10 = (uint64_t) f_hash(key10, key_size, seed); \ + bkt10_index = sig10 & bucket_mask; \ + bkt10 = &buckets[bkt10_index]; \ + \ + mbuf11 = pkts[pkt11_index]; \ + key11 = RTE_MBUF_METADATA_UINT8_PTR(mbuf11, key_offset);\ + sig11 = (uint64_t) f_hash(key11, key_size, seed); \ + bkt11_index = sig11 & bucket_mask; \ + bkt11 = &buckets[bkt11_index]; \ + \ + rte_prefetch0(bkt10); \ + rte_prefetch0(bkt11); \ + \ + g10 = &g[pkt10_index]; \ + g10->sig = sig10; \ + g10->bkt = bkt10; \ + \ + g11 = &g[pkt11_index]; \ + g11->sig = sig11; \ + g11->bkt = bkt11; \ +} + +#define lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many)\ +{ \ + struct grinder *g20, *g21; \ + uint64_t sig20, sig21; \ + struct bucket *bkt20, *bkt21; \ + uint8_t *key20, *key21, *key_mem = t->key_mem; \ + uint64_t match20, match21, match_many20, match_many21; \ + uint64_t match_pos20, match_pos21; \ + uint32_t key20_index, key21_index, key_size_shl = t->key_size_shl;\ + \ + g20 = &g[pkt20_index]; \ + sig20 = g20->sig; \ + bkt20 = g20->bkt; \ + sig20 = (sig20 >> 16) | 1LLU; \ + lookup_cmp_sig(sig20, bkt20, match20, match_many20, match_pos20);\ + match20 <<= pkt20_index; \ + match_many20 <<= pkt20_index; \ + key20_index = bkt20->key_pos[match_pos20]; \ + key20 = &key_mem[key20_index << key_size_shl]; \ + \ + g21 = &g[pkt21_index]; \ + sig21 = g21->sig; \ + bkt21 = g21->bkt; \ + sig21 = (sig21 >> 16) | 1LLU; \ + lookup_cmp_sig(sig21, bkt21, match21, match_many21, match_pos21);\ + match21 <<= pkt21_index; \ + match_many21 <<= pkt21_index; \ + key21_index = bkt21->key_pos[match_pos21]; \ + key21 = &key_mem[key21_index << key_size_shl]; \ + \ + rte_prefetch0(key20); \ + rte_prefetch0(key21); \ + \ + pkts_mask_match_many |= match_many20 | match_many21; \ + \ + g20->match = match20; \ + g20->match_pos = match_pos20; \ + g20->key_index = key20_index; \ + \ + g21->match = match21; \ + g21->match_pos = match_pos21; \ + g21->key_index = key21_index; \ +} + +#define lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, \ + entries) \ +{ \ + struct grinder *g30, *g31; \ + struct rte_mbuf *mbuf30, *mbuf31; \ + struct bucket *bkt30, *bkt31; \ + uint8_t *key30, *key31, *key_mem = t->key_mem; \ + uint8_t *data30, *data31, *data_mem = t->data_mem; \ + uint64_t match30, match31, match_pos30, match_pos31; \ + uint64_t match_key30, match_key31, match_keys; \ + uint32_t key30_index, key31_index; \ + uint32_t key_size_shl = t->key_size_shl; \ + uint32_t data_size_shl = t->data_size_shl; \ + \ + mbuf30 = pkts[pkt30_index]; \ + g30 = &g[pkt30_index]; \ + bkt30 = g30->bkt; \ + match30 = g30->match; \ + match_pos30 = g30->match_pos; \ + key30_index = g30->key_index; \ + key30 = &key_mem[key30_index << key_size_shl]; \ + lookup_cmp_key(mbuf30, key30, match_key30, t); \ + match_key30 <<= pkt30_index; \ + match_key30 &= match30; \ + data30 = &data_mem[key30_index << data_size_shl]; \ + entries[pkt30_index] = data30; \ + \ + mbuf31 = pkts[pkt31_index]; \ + g31 = &g[pkt31_index]; \ + bkt31 = g31->bkt; \ + match31 = g31->match; \ + match_pos31 = g31->match_pos; \ + key31_index = g31->key_index; \ + key31 = &key_mem[key31_index << key_size_shl]; \ + lookup_cmp_key(mbuf31, key31, match_key31, t); \ + match_key31 <<= pkt31_index; \ + match_key31 &= match31; \ + data31 = &data_mem[key31_index << data_size_shl]; \ + entries[pkt31_index] = data31; \ + \ + rte_prefetch0(data30); \ + rte_prefetch0(data31); \ + \ + match_keys = match_key30 | match_key31; \ + pkts_mask_out |= match_keys; \ + \ + if (match_key30 == 0) \ + match_pos30 = 4; \ + lru_update(bkt30, match_pos30); \ + \ + if (match_key31 == 0) \ + match_pos31 = 4; \ + lru_update(bkt31, match_pos31); \ +} + +/*** +* The lookup function implements a 4-stage pipeline, with each stage processing +* two different packets. The purpose of pipelined implementation is to hide the +* latency of prefetching the data structures and loosen the data dependency +* between instructions. +* +* p00 _______ p10 _______ p20 _______ p30 _______ +* ----->| |----->| |----->| |----->| |-----> +* | 0 | | 1 | | 2 | | 3 | +* ----->|_______|----->|_______|----->|_______|----->|_______|-----> +* p01 p11 p21 p31 +* +* The naming convention is: +* pXY = packet Y of stage X, X = 0 .. 3, Y = 0 .. 1 +* +***/ +static int rte_table_hash_lru_lookup( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct grinder *g = t->grinders; + uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index; + uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index; + uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0; + int status = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in); + + /* Cannot run the pipeline with less than 7 packets */ + if (__builtin_popcountll(pkts_mask) < 7) + return rte_table_hash_lru_lookup_unoptimized(table, pkts, + pkts_mask, lookup_hit_mask, entries, 0); + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline feed */ + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline feed */ + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, + pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, + pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, + pkts_mask_out, entries); + } + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Slow path */ + pkts_mask_match_many &= ~pkts_mask_out; + if (pkts_mask_match_many) { + uint64_t pkts_mask_out_slow = 0; + + status = rte_table_hash_lru_lookup_unoptimized(table, pkts, + pkts_mask_match_many, &pkts_mask_out_slow, entries, 0); + pkts_mask_out |= pkts_mask_out_slow; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return status; +} + +static int rte_table_hash_lru_lookup_dosig( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + struct grinder *g = t->grinders; + uint64_t pkt00_index, pkt01_index, pkt10_index, pkt11_index; + uint64_t pkt20_index, pkt21_index, pkt30_index, pkt31_index; + uint64_t pkts_mask_out = 0, pkts_mask_match_many = 0; + int status = 0; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_HASH_LRU_STATS_PKTS_IN_ADD(t, n_pkts_in); + + /* Cannot run the pipeline with less than 7 packets */ + if (__builtin_popcountll(pkts_mask) < 7) + return rte_table_hash_lru_lookup_unoptimized(table, pkts, + pkts_mask, lookup_hit_mask, entries, 1); + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline feed */ + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline feed */ + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0(t, g, pkts, pkts_mask, pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* + * Pipeline run + * + */ + for ( ; pkts_mask; ) { + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 0 */ + lookup2_stage0_with_odd_support(t, g, pkts, pkts_mask, + pkt00_index, pkt01_index); + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, + pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, + pkts_mask_out, entries); + } + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + pkt10_index = pkt00_index; + pkt11_index = pkt01_index; + + /* Pipeline stage 1 */ + lookup2_stage1_dosig(t, g, pkts, pkt10_index, pkt11_index); + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + pkt20_index = pkt10_index; + pkt21_index = pkt11_index; + + /* Pipeline stage 2 */ + lookup2_stage2(t, g, pkt20_index, pkt21_index, pkts_mask_match_many); + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Pipeline feed */ + pkt30_index = pkt20_index; + pkt31_index = pkt21_index; + + /* Pipeline stage 3 */ + lookup2_stage3(t, g, pkts, pkt30_index, pkt31_index, pkts_mask_out, + entries); + + /* Slow path */ + pkts_mask_match_many &= ~pkts_mask_out; + if (pkts_mask_match_many) { + uint64_t pkts_mask_out_slow = 0; + + status = rte_table_hash_lru_lookup_unoptimized(table, pkts, + pkts_mask_match_many, &pkts_mask_out_slow, entries, 1); + pkts_mask_out |= pkts_mask_out_slow; + } + + *lookup_hit_mask = pkts_mask_out; + RTE_TABLE_HASH_LRU_STATS_PKTS_LOOKUP_MISS(t, n_pkts_in - __builtin_popcountll(pkts_mask_out)); + return status; +} + +static int +rte_table_hash_lru_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_hash *t = (struct rte_table_hash *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_hash_lru_ops = { + .f_create = rte_table_hash_lru_create, + .f_free = rte_table_hash_lru_free, + .f_add = rte_table_hash_lru_entry_add, + .f_delete = rte_table_hash_lru_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lru_lookup, + .f_stats = rte_table_hash_lru_stats_read, +}; + +struct rte_table_ops rte_table_hash_lru_dosig_ops = { + .f_create = rte_table_hash_lru_create, + .f_free = rte_table_hash_lru_free, + .f_add = rte_table_hash_lru_entry_add, + .f_delete = rte_table_hash_lru_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_hash_lru_lookup_dosig, + .f_stats = rte_table_hash_lru_stats_read, +}; diff --git a/lib/librte_table/rte_table_lpm.c b/lib/librte_table/rte_table_lpm.c new file mode 100644 index 00000000..cdeb0f5a --- /dev/null +++ b/lib/librte_table/rte_table_lpm.c @@ -0,0 +1,393 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "rte_table_lpm.h" + +#define RTE_TABLE_LPM_MAX_NEXT_HOPS 256 + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_table_lpm { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t entry_size; + uint32_t entry_unique_size; + uint32_t n_rules; + uint32_t offset; + + /* Handle to low-level LPM table */ + struct rte_lpm *lpm; + + /* Next Hop Table (NHT) */ + uint32_t nht_users[RTE_TABLE_LPM_MAX_NEXT_HOPS]; + uint32_t nht[0] __rte_cache_aligned; +}; + +static void * +rte_table_lpm_create(void *params, int socket_id, uint32_t entry_size) +{ + struct rte_table_lpm_params *p = (struct rte_table_lpm_params *) params; + struct rte_table_lpm *lpm; + struct rte_lpm_config lpm_config; + + uint32_t total_size, nht_size; + + /* Check input parameters */ + if (p == NULL) { + RTE_LOG(ERR, TABLE, "%s: NULL input parameters\n", __func__); + return NULL; + } + if (p->n_rules == 0) { + RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__); + return NULL; + } + if (p->number_tbl8s == 0) { + RTE_LOG(ERR, TABLE, "%s: Invalid number_tbl8s\n", __func__); + return NULL; + } + if (p->entry_unique_size == 0) { + RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n", + __func__); + return NULL; + } + if (p->entry_unique_size > entry_size) { + RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n", + __func__); + return NULL; + } + if (p->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: Table name is NULL\n", + __func__); + return NULL; + } + entry_size = RTE_ALIGN(entry_size, sizeof(uint64_t)); + + /* Memory allocation */ + nht_size = RTE_TABLE_LPM_MAX_NEXT_HOPS * entry_size; + total_size = sizeof(struct rte_table_lpm) + nht_size; + lpm = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, + socket_id); + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for LPM table\n", + __func__, total_size); + return NULL; + } + + /* LPM low-level table creation */ + lpm_config.max_rules = p->n_rules; + lpm_config.number_tbl8s = p->number_tbl8s; + lpm_config.flags = p->flags; + lpm->lpm = rte_lpm_create(p->name, socket_id, &lpm_config); + + if (lpm->lpm == NULL) { + rte_free(lpm); + RTE_LOG(ERR, TABLE, "Unable to create low-level LPM table\n"); + return NULL; + } + + /* Memory initialization */ + lpm->entry_size = entry_size; + lpm->entry_unique_size = p->entry_unique_size; + lpm->n_rules = p->n_rules; + lpm->offset = p->offset; + + return lpm; +} + +static int +rte_table_lpm_free(void *table) +{ + struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; + + /* Check input parameters */ + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + /* Free previously allocated resources */ + rte_lpm_free(lpm->lpm); + rte_free(lpm); + + return 0; +} + +static int +nht_find_free(struct rte_table_lpm *lpm, uint32_t *pos) +{ + uint32_t i; + + for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) { + if (lpm->nht_users[i] == 0) { + *pos = i; + return 1; + } + } + + return 0; +} + +static int +nht_find_existing(struct rte_table_lpm *lpm, void *entry, uint32_t *pos) +{ + uint32_t i; + + for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) { + uint32_t *nht_entry = &lpm->nht[i * lpm->entry_size]; + + if ((lpm->nht_users[i] > 0) && (memcmp(nht_entry, entry, + lpm->entry_unique_size) == 0)) { + *pos = i; + return 1; + } + } + + return 0; +} + +static int +rte_table_lpm_entry_add( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; + struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key; + uint32_t nht_pos, nht_pos0_valid; + int status; + uint32_t nht_pos0 = 0; + + /* Check input parameters */ + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (ip_prefix == NULL) { + RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n", + __func__); + return -EINVAL; + } + if (entry == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + return -EINVAL; + } + + if ((ip_prefix->depth == 0) || (ip_prefix->depth > 32)) { + RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", + __func__, ip_prefix->depth); + return -EINVAL; + } + + /* Check if rule is already present in the table */ + status = rte_lpm_is_rule_present(lpm->lpm, ip_prefix->ip, + ip_prefix->depth, &nht_pos0); + nht_pos0_valid = status > 0; + + /* Find existing or free NHT entry */ + if (nht_find_existing(lpm, entry, &nht_pos) == 0) { + uint32_t *nht_entry; + + if (nht_find_free(lpm, &nht_pos) == 0) { + RTE_LOG(ERR, TABLE, "%s: NHT full\n", __func__); + return -1; + } + + nht_entry = &lpm->nht[nht_pos * lpm->entry_size]; + memcpy(nht_entry, entry, lpm->entry_size); + } + + /* Add rule to low level LPM table */ + if (rte_lpm_add(lpm->lpm, ip_prefix->ip, ip_prefix->depth, nht_pos) < 0) { + RTE_LOG(ERR, TABLE, "%s: LPM rule add failed\n", __func__); + return -1; + } + + /* Commit NHT changes */ + lpm->nht_users[nht_pos]++; + lpm->nht_users[nht_pos0] -= nht_pos0_valid; + + *key_found = nht_pos0_valid; + *entry_ptr = (void *) &lpm->nht[nht_pos * lpm->entry_size]; + return 0; +} + +static int +rte_table_lpm_entry_delete( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; + struct rte_table_lpm_key *ip_prefix = (struct rte_table_lpm_key *) key; + uint32_t nht_pos; + int status; + + /* Check input parameters */ + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (ip_prefix == NULL) { + RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n", + __func__); + return -EINVAL; + } + if ((ip_prefix->depth == 0) || (ip_prefix->depth > 32)) { + RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", __func__, + ip_prefix->depth); + return -EINVAL; + } + + /* Return if rule is not present in the table */ + status = rte_lpm_is_rule_present(lpm->lpm, ip_prefix->ip, + ip_prefix->depth, &nht_pos); + if (status < 0) { + RTE_LOG(ERR, TABLE, "%s: LPM algorithmic error\n", __func__); + return -1; + } + if (status == 0) { + *key_found = 0; + return 0; + } + + /* Delete rule from the low-level LPM table */ + status = rte_lpm_delete(lpm->lpm, ip_prefix->ip, ip_prefix->depth); + if (status) { + RTE_LOG(ERR, TABLE, "%s: LPM rule delete failed\n", __func__); + return -1; + } + + /* Commit NHT changes */ + lpm->nht_users[nht_pos]--; + + *key_found = 1; + if (entry) + memcpy(entry, &lpm->nht[nht_pos * lpm->entry_size], + lpm->entry_size); + + return 0; +} + +static int +rte_table_lpm_lookup( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_lpm *lpm = (struct rte_table_lpm *) table; + uint64_t pkts_out_mask = 0; + uint32_t i; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_LPM_STATS_PKTS_IN_ADD(lpm, n_pkts_in); + + pkts_out_mask = 0; + for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX - + __builtin_clzll(pkts_mask)); i++) { + uint64_t pkt_mask = 1LLU << i; + + if (pkt_mask & pkts_mask) { + struct rte_mbuf *pkt = pkts[i]; + uint32_t ip = rte_bswap32( + RTE_MBUF_METADATA_UINT32(pkt, lpm->offset)); + int status; + uint32_t nht_pos; + + status = rte_lpm_lookup(lpm->lpm, ip, &nht_pos); + if (status == 0) { + pkts_out_mask |= pkt_mask; + entries[i] = (void *) &lpm->nht[nht_pos * + lpm->entry_size]; + } + } + } + + *lookup_hit_mask = pkts_out_mask; + RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - __builtin_popcountll(pkts_out_mask)); + return 0; +} + +static int +rte_table_lpm_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_lpm *t = (struct rte_table_lpm *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_lpm_ops = { + .f_create = rte_table_lpm_create, + .f_free = rte_table_lpm_free, + .f_add = rte_table_lpm_entry_add, + .f_delete = rte_table_lpm_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_lpm_lookup, + .f_stats = rte_table_lpm_stats_read, +}; diff --git a/lib/librte_table/rte_table_lpm.h b/lib/librte_table/rte_table_lpm.h new file mode 100644 index 00000000..f3033234 --- /dev/null +++ b/lib/librte_table/rte_table_lpm.h @@ -0,0 +1,124 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_LPM_H__ +#define __INCLUDE_RTE_TABLE_LPM_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table LPM for IPv4 + * + * This table uses the Longest Prefix Match (LPM) algorithm to uniquely + * associate data to lookup keys. + * + * Use-case: IP routing table. Routes that are added to the table associate a + * next hop to an IP prefix. The IP prefix is specified as IP address and depth + * and cover for a multitude of lookup keys (i.e. destination IP addresses) + * that all share the same data (i.e. next hop). The next hop information + * typically contains the output interface ID, the IP address of the next hop + * station (which is part of the same IP network the output interface is + * connected to) and other flags and counters. + * + * The LPM primitive only allows associating an 8-bit number (next hop ID) to + * an IP prefix, while a routing table can potentially contain thousands of + * routes or even more. This means that the same next hop ID (and next hop + * information) has to be shared by multiple routes, which makes sense, as + * multiple remote networks could be reached through the same next hop. + * Therefore, when a route is added or updated, the LPM table has to check + * whether the same next hop is already in use before using a new next hop ID + * for this route. + * + * The comparison between different next hops is done for the first + * “entry_unique_size” bytes of the next hop information (configurable + * parameter), which have to uniquely identify the next hop, therefore the user + * has to carefully manage the format of the LPM table entry (i.e. the next + * hop information) so that any next hop data that changes value during + * run-time (e.g. counters) is placed outside of this area. + * + ***/ + +#include + +#include "rte_table.h" + +/** LPM table parameters */ +struct rte_table_lpm_params { + /** Table name */ + const char *name; + + /** Maximum number of LPM rules (i.e. IP routes) */ + uint32_t n_rules; + + /**< Number of tbl8s to allocate. */ + uint32_t number_tbl8s; + + /**< This field is currently unused. */ + int flags; + + /** Number of bytes at the start of the table entry that uniquely + identify the entry. Cannot be bigger than table entry size. */ + uint32_t entry_unique_size; + + /** Byte offset within input packet meta-data where lookup key (i.e. + the destination IP address) is located. */ + uint32_t offset; +}; + +/** LPM table rule (i.e. route), specified as IP prefix. While the key used by +the lookup operation is the destination IP address (read from the input packet +meta-data), the entry add and entry delete operations work with LPM rules, with +each rule covering for a multitude of lookup keys (destination IP addresses) +that share the same data (next hop). */ +struct rte_table_lpm_key { + /** IP address */ + uint32_t ip; + + /** IP address depth. The most significant "depth" bits of the IP + address specify the network part of the IP address, while the rest of + the bits specify the host part of the address and are ignored for the + purpose of route specification. */ + uint8_t depth; +}; + +/** LPM table operations */ +extern struct rte_table_ops rte_table_lpm_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table_lpm_ipv6.c b/lib/librte_table/rte_table_lpm_ipv6.c new file mode 100644 index 00000000..836f4cf6 --- /dev/null +++ b/lib/librte_table/rte_table_lpm_ipv6.c @@ -0,0 +1,398 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "rte_table_lpm_ipv6.h" + +#define RTE_TABLE_LPM_MAX_NEXT_HOPS 256 + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_LPM_IPV6_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_LPM_IPV6_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_table_lpm_ipv6 { + struct rte_table_stats stats; + + /* Input parameters */ + uint32_t entry_size; + uint32_t entry_unique_size; + uint32_t n_rules; + uint32_t offset; + + /* Handle to low-level LPM table */ + struct rte_lpm6 *lpm; + + /* Next Hop Table (NHT) */ + uint32_t nht_users[RTE_TABLE_LPM_MAX_NEXT_HOPS]; + uint8_t nht[0] __rte_cache_aligned; +}; + +static void * +rte_table_lpm_ipv6_create(void *params, int socket_id, uint32_t entry_size) +{ + struct rte_table_lpm_ipv6_params *p = + (struct rte_table_lpm_ipv6_params *) params; + struct rte_table_lpm_ipv6 *lpm; + struct rte_lpm6_config lpm6_config; + uint32_t total_size, nht_size; + + /* Check input parameters */ + if (p == NULL) { + RTE_LOG(ERR, TABLE, "%s: NULL input parameters\n", __func__); + return NULL; + } + if (p->n_rules == 0) { + RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__); + return NULL; + } + if (p->number_tbl8s == 0) { + RTE_LOG(ERR, TABLE, "%s: Invalid n_rules\n", __func__); + return NULL; + } + if (p->entry_unique_size == 0) { + RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n", + __func__); + return NULL; + } + if (p->entry_unique_size > entry_size) { + RTE_LOG(ERR, TABLE, "%s: Invalid entry_unique_size\n", + __func__); + return NULL; + } + if (p->name == NULL) { + RTE_LOG(ERR, TABLE, "%s: Table name is NULL\n", + __func__); + return NULL; + } + entry_size = RTE_ALIGN(entry_size, sizeof(uint64_t)); + + /* Memory allocation */ + nht_size = RTE_TABLE_LPM_MAX_NEXT_HOPS * entry_size; + total_size = sizeof(struct rte_table_lpm_ipv6) + nht_size; + lpm = rte_zmalloc_socket("TABLE", total_size, RTE_CACHE_LINE_SIZE, + socket_id); + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for LPM IPv6 table\n", + __func__, total_size); + return NULL; + } + + /* LPM low-level table creation */ + lpm6_config.max_rules = p->n_rules; + lpm6_config.number_tbl8s = p->number_tbl8s; + lpm6_config.flags = 0; + lpm->lpm = rte_lpm6_create(p->name, socket_id, &lpm6_config); + if (lpm->lpm == NULL) { + rte_free(lpm); + RTE_LOG(ERR, TABLE, + "Unable to create low-level LPM IPv6 table\n"); + return NULL; + } + + /* Memory initialization */ + lpm->entry_size = entry_size; + lpm->entry_unique_size = p->entry_unique_size; + lpm->n_rules = p->n_rules; + lpm->offset = p->offset; + + return lpm; +} + +static int +rte_table_lpm_ipv6_free(void *table) +{ + struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table; + + /* Check input parameters */ + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + + /* Free previously allocated resources */ + rte_lpm6_free(lpm->lpm); + rte_free(lpm); + + return 0; +} + +static int +nht_find_free(struct rte_table_lpm_ipv6 *lpm, uint32_t *pos) +{ + uint32_t i; + + for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) { + if (lpm->nht_users[i] == 0) { + *pos = i; + return 1; + } + } + + return 0; +} + +static int +nht_find_existing(struct rte_table_lpm_ipv6 *lpm, void *entry, uint32_t *pos) +{ + uint32_t i; + + for (i = 0; i < RTE_TABLE_LPM_MAX_NEXT_HOPS; i++) { + uint8_t *nht_entry = &lpm->nht[i * lpm->entry_size]; + + if ((lpm->nht_users[i] > 0) && (memcmp(nht_entry, entry, + lpm->entry_unique_size) == 0)) { + *pos = i; + return 1; + } + } + + return 0; +} + +static int +rte_table_lpm_ipv6_entry_add( + void *table, + void *key, + void *entry, + int *key_found, + void **entry_ptr) +{ + struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table; + struct rte_table_lpm_ipv6_key *ip_prefix = + (struct rte_table_lpm_ipv6_key *) key; + uint32_t nht_pos, nht_pos0_valid; + int status; + uint8_t nht_pos0; + + /* Check input parameters */ + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (ip_prefix == NULL) { + RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n", + __func__); + return -EINVAL; + } + if (entry == NULL) { + RTE_LOG(ERR, TABLE, "%s: entry parameter is NULL\n", __func__); + return -EINVAL; + } + + if ((ip_prefix->depth == 0) || (ip_prefix->depth > 128)) { + RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", __func__, + ip_prefix->depth); + return -EINVAL; + } + + /* Check if rule is already present in the table */ + status = rte_lpm6_is_rule_present(lpm->lpm, ip_prefix->ip, + ip_prefix->depth, &nht_pos0); + nht_pos0_valid = status > 0; + + /* Find existing or free NHT entry */ + if (nht_find_existing(lpm, entry, &nht_pos) == 0) { + uint8_t *nht_entry; + + if (nht_find_free(lpm, &nht_pos) == 0) { + RTE_LOG(ERR, TABLE, "%s: NHT full\n", __func__); + return -1; + } + + nht_entry = &lpm->nht[nht_pos * lpm->entry_size]; + memcpy(nht_entry, entry, lpm->entry_size); + } + + /* Add rule to low level LPM table */ + if (rte_lpm6_add(lpm->lpm, ip_prefix->ip, ip_prefix->depth, + (uint8_t) nht_pos) < 0) { + RTE_LOG(ERR, TABLE, "%s: LPM IPv6 rule add failed\n", __func__); + return -1; + } + + /* Commit NHT changes */ + lpm->nht_users[nht_pos]++; + lpm->nht_users[nht_pos0] -= nht_pos0_valid; + + *key_found = nht_pos0_valid; + *entry_ptr = (void *) &lpm->nht[nht_pos * lpm->entry_size]; + return 0; +} + +static int +rte_table_lpm_ipv6_entry_delete( + void *table, + void *key, + int *key_found, + void *entry) +{ + struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table; + struct rte_table_lpm_ipv6_key *ip_prefix = + (struct rte_table_lpm_ipv6_key *) key; + uint8_t nht_pos; + int status; + + /* Check input parameters */ + if (lpm == NULL) { + RTE_LOG(ERR, TABLE, "%s: table parameter is NULL\n", __func__); + return -EINVAL; + } + if (ip_prefix == NULL) { + RTE_LOG(ERR, TABLE, "%s: ip_prefix parameter is NULL\n", + __func__); + return -EINVAL; + } + if ((ip_prefix->depth == 0) || (ip_prefix->depth > 128)) { + RTE_LOG(ERR, TABLE, "%s: invalid depth (%d)\n", __func__, + ip_prefix->depth); + return -EINVAL; + } + + /* Return if rule is not present in the table */ + status = rte_lpm6_is_rule_present(lpm->lpm, ip_prefix->ip, + ip_prefix->depth, &nht_pos); + if (status < 0) { + RTE_LOG(ERR, TABLE, "%s: LPM IPv6 algorithmic error\n", + __func__); + return -1; + } + if (status == 0) { + *key_found = 0; + return 0; + } + + /* Delete rule from the low-level LPM table */ + status = rte_lpm6_delete(lpm->lpm, ip_prefix->ip, ip_prefix->depth); + if (status) { + RTE_LOG(ERR, TABLE, "%s: LPM IPv6 rule delete failed\n", + __func__); + return -1; + } + + /* Commit NHT changes */ + lpm->nht_users[nht_pos]--; + + *key_found = 1; + if (entry) + memcpy(entry, &lpm->nht[nht_pos * lpm->entry_size], + lpm->entry_size); + + return 0; +} + +static int +rte_table_lpm_ipv6_lookup( + void *table, + struct rte_mbuf **pkts, + uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + void **entries) +{ + struct rte_table_lpm_ipv6 *lpm = (struct rte_table_lpm_ipv6 *) table; + uint64_t pkts_out_mask = 0; + uint32_t i; + + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + RTE_TABLE_LPM_IPV6_STATS_PKTS_IN_ADD(lpm, n_pkts_in); + + pkts_out_mask = 0; + for (i = 0; i < (uint32_t)(RTE_PORT_IN_BURST_SIZE_MAX - + __builtin_clzll(pkts_mask)); i++) { + uint64_t pkt_mask = 1LLU << i; + + if (pkt_mask & pkts_mask) { + struct rte_mbuf *pkt = pkts[i]; + uint8_t *ip = RTE_MBUF_METADATA_UINT8_PTR(pkt, + lpm->offset); + int status; + uint8_t nht_pos; + + status = rte_lpm6_lookup(lpm->lpm, ip, &nht_pos); + if (status == 0) { + pkts_out_mask |= pkt_mask; + entries[i] = (void *) &lpm->nht[nht_pos * + lpm->entry_size]; + } + } + } + + *lookup_hit_mask = pkts_out_mask; + RTE_TABLE_LPM_IPV6_STATS_PKTS_LOOKUP_MISS(lpm, n_pkts_in - __builtin_popcountll(pkts_out_mask)); + return 0; +} + +static int +rte_table_lpm_ipv6_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_lpm_ipv6 *t = (struct rte_table_lpm_ipv6 *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_lpm_ipv6_ops = { + .f_create = rte_table_lpm_ipv6_create, + .f_free = rte_table_lpm_ipv6_free, + .f_add = rte_table_lpm_ipv6_entry_add, + .f_delete = rte_table_lpm_ipv6_entry_delete, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_lpm_ipv6_lookup, + .f_stats = rte_table_lpm_ipv6_stats_read, +}; diff --git a/lib/librte_table/rte_table_lpm_ipv6.h b/lib/librte_table/rte_table_lpm_ipv6.h new file mode 100644 index 00000000..43aea399 --- /dev/null +++ b/lib/librte_table/rte_table_lpm_ipv6.h @@ -0,0 +1,122 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_LPM_IPV6_H__ +#define __INCLUDE_RTE_TABLE_LPM_IPV6_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table LPM for IPv6 + * + * This table uses the Longest Prefix Match (LPM) algorithm to uniquely + * associate data to lookup keys. + * + * Use-case: IP routing table. Routes that are added to the table associate a + * next hop to an IP prefix. The IP prefix is specified as IP address and depth + * and cover for a multitude of lookup keys (i.e. destination IP addresses) + * that all share the same data (i.e. next hop). The next hop information + * typically contains the output interface ID, the IP address of the next hop + * station (which is part of the same IP network the output interface is + * connected to) and other flags and counters. + * + * The LPM primitive only allows associating an 8-bit number (next hop ID) to + * an IP prefix, while a routing table can potentially contain thousands of + * routes or even more. This means that the same next hop ID (and next hop + * information) has to be shared by multiple routes, which makes sense, as + * multiple remote networks could be reached through the same next hop. + * Therefore, when a route is added or updated, the LPM table has to check + * whether the same next hop is already in use before using a new next hop ID + * for this route. + * + * The comparison between different next hops is done for the first + * “entry_unique_size” bytes of the next hop information (configurable + * parameter), which have to uniquely identify the next hop, therefore the user + * has to carefully manage the format of the LPM table entry (i.e. the next + * hop information) so that any next hop data that changes value during + * run-time (e.g. counters) is placed outside of this area. + * + ***/ + +#include + +#include "rte_table.h" + +#define RTE_LPM_IPV6_ADDR_SIZE 16 + +/** LPM table parameters */ +struct rte_table_lpm_ipv6_params { + /** Table name */ + const char *name; + + /** Maximum number of LPM rules (i.e. IP routes) */ + uint32_t n_rules; + + uint32_t number_tbl8s; + + /** Number of bytes at the start of the table entry that uniquely + identify the entry. Cannot be bigger than table entry size. */ + uint32_t entry_unique_size; + + /** Byte offset within input packet meta-data where lookup key (i.e. + the destination IP address) is located. */ + uint32_t offset; +}; + +/** LPM table rule (i.e. route), specified as IP prefix. While the key used by +the lookup operation is the destination IP address (read from the input packet +meta-data), the entry add and entry delete operations work with LPM rules, with +each rule covering for a multitude of lookup keys (destination IP addresses) +that share the same data (next hop). */ +struct rte_table_lpm_ipv6_key { + /** IP address */ + uint8_t ip[RTE_LPM_IPV6_ADDR_SIZE]; + + /** IP address depth. The most significant "depth" bits of the IP + address specify the network part of the IP address, while the rest of + the bits specify the host part of the address and are ignored for the + purpose of route specification. */ + uint8_t depth; +}; + +/** LPM table operations */ +extern struct rte_table_ops rte_table_lpm_ipv6_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table_stub.c b/lib/librte_table/rte_table_stub.c new file mode 100644 index 00000000..691d681a --- /dev/null +++ b/lib/librte_table/rte_table_stub.c @@ -0,0 +1,121 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include + +#include "rte_table_stub.h" + +#ifdef RTE_TABLE_STATS_COLLECT + +#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val) \ + table->stats.n_pkts_in += val +#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val) \ + table->stats.n_pkts_lookup_miss += val + +#else + +#define RTE_TABLE_LPM_STATS_PKTS_IN_ADD(table, val) +#define RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(table, val) + +#endif + +struct rte_table_stub { + struct rte_table_stats stats; +}; + +static void * +rte_table_stub_create(__rte_unused void *params, + __rte_unused int socket_id, + __rte_unused uint32_t entry_size) +{ + struct rte_table_stub *stub; + uint32_t size; + + size = sizeof(struct rte_table_stub); + stub = rte_zmalloc_socket("TABLE", size, RTE_CACHE_LINE_SIZE, + socket_id); + if (stub == NULL) { + RTE_LOG(ERR, TABLE, + "%s: Cannot allocate %u bytes for stub table\n", + __func__, size); + return NULL; + } + + return stub; +} + +static int +rte_table_stub_lookup( + __rte_unused void *table, + __rte_unused struct rte_mbuf **pkts, + __rte_unused uint64_t pkts_mask, + uint64_t *lookup_hit_mask, + __rte_unused void **entries) +{ + __rte_unused struct rte_table_stub *stub = (struct rte_table_stub *) table; + __rte_unused uint32_t n_pkts_in = __builtin_popcountll(pkts_mask); + + RTE_TABLE_LPM_STATS_PKTS_IN_ADD(stub, n_pkts_in); + *lookup_hit_mask = 0; + RTE_TABLE_LPM_STATS_PKTS_LOOKUP_MISS(stub, n_pkts_in); + + return 0; +} + +static int +rte_table_stub_stats_read(void *table, struct rte_table_stats *stats, int clear) +{ + struct rte_table_stub *t = (struct rte_table_stub *) table; + + if (stats != NULL) + memcpy(stats, &t->stats, sizeof(t->stats)); + + if (clear) + memset(&t->stats, 0, sizeof(t->stats)); + + return 0; +} + +struct rte_table_ops rte_table_stub_ops = { + .f_create = rte_table_stub_create, + .f_free = NULL, + .f_add = NULL, + .f_delete = NULL, + .f_add_bulk = NULL, + .f_delete_bulk = NULL, + .f_lookup = rte_table_stub_lookup, + .f_stats = rte_table_stub_stats_read, +}; diff --git a/lib/librte_table/rte_table_stub.h b/lib/librte_table/rte_table_stub.h new file mode 100644 index 00000000..e75340b0 --- /dev/null +++ b/lib/librte_table/rte_table_stub.h @@ -0,0 +1,62 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef __INCLUDE_RTE_TABLE_STUB_H__ +#define __INCLUDE_RTE_TABLE_STUB_H__ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * @file + * RTE Table Stub + * + * The stub table lookup operation produces lookup miss for all input packets. + * + ***/ + +#include + +#include "rte_table.h" + +/** Stub table parameters: NONE */ + +/** Stub table operations */ +extern struct rte_table_ops rte_table_stub_ops; + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_table/rte_table_version.map b/lib/librte_table/rte_table_version.map new file mode 100644 index 00000000..21386984 --- /dev/null +++ b/lib/librte_table/rte_table_version.map @@ -0,0 +1,28 @@ +DPDK_2.0 { + global: + + rte_table_acl_ops; + rte_table_array_ops; + rte_table_hash_ext_ops; + rte_table_hash_key8_ext_dosig_ops; + rte_table_hash_key8_ext_ops; + rte_table_hash_key8_lru_dosig_ops; + rte_table_hash_key8_lru_ops; + rte_table_hash_key16_ext_ops; + rte_table_hash_key16_lru_ops; + rte_table_hash_key32_ext_ops; + rte_table_hash_key32_lru_ops; + rte_table_hash_lru_ops; + rte_table_lpm_ipv6_ops; + rte_table_lpm_ops; + rte_table_stub_ops; + + local: *; +}; + +DPDK_2.2 { + global: + + rte_table_hash_key16_ext_dosig_ops; + +} DPDK_2.0; diff --git a/lib/librte_timer/Makefile b/lib/librte_timer/Makefile new file mode 100644 index 00000000..2aabef85 --- /dev/null +++ b/lib/librte_timer/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_timer.a + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 + +EXPORT_MAP := rte_timer_version.map + +LIBABIVER := 1 + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_TIMER) := rte_timer.c + +# install this header file +SYMLINK-$(CONFIG_RTE_LIBRTE_TIMER)-include := rte_timer.h + +# this lib needs eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_TIMER) += lib/librte_eal + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_timer/rte_timer.c b/lib/librte_timer/rte_timer.c new file mode 100644 index 00000000..3dcdab58 --- /dev/null +++ b/lib/librte_timer/rte_timer.c @@ -0,0 +1,637 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "rte_timer.h" + +LIST_HEAD(rte_timer_list, rte_timer); + +struct priv_timer { + struct rte_timer pending_head; /**< dummy timer instance to head up list */ + rte_spinlock_t list_lock; /**< lock to protect list access */ + + /** per-core variable that true if a timer was updated on this + * core since last reset of the variable */ + int updated; + + /** track the current depth of the skiplist */ + unsigned curr_skiplist_depth; + + unsigned prev_lcore; /**< used for lcore round robin */ + +#ifdef RTE_LIBRTE_TIMER_DEBUG + /** per-lcore statistics */ + struct rte_timer_debug_stats stats; +#endif +} __rte_cache_aligned; + +/** per-lcore private info for timers */ +static struct priv_timer priv_timer[RTE_MAX_LCORE]; + +/* when debug is enabled, store some statistics */ +#ifdef RTE_LIBRTE_TIMER_DEBUG +#define __TIMER_STAT_ADD(name, n) do { \ + unsigned __lcore_id = rte_lcore_id(); \ + if (__lcore_id < RTE_MAX_LCORE) \ + priv_timer[__lcore_id].stats.name += (n); \ + } while(0) +#else +#define __TIMER_STAT_ADD(name, n) do {} while(0) +#endif + +/* Init the timer library. */ +void +rte_timer_subsystem_init(void) +{ + unsigned lcore_id; + + /* since priv_timer is static, it's zeroed by default, so only init some + * fields. + */ + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id ++) { + rte_spinlock_init(&priv_timer[lcore_id].list_lock); + priv_timer[lcore_id].prev_lcore = lcore_id; + } +} + +/* Initialize the timer handle tim for use */ +void +rte_timer_init(struct rte_timer *tim) +{ + union rte_timer_status status; + + status.state = RTE_TIMER_STOP; + status.owner = RTE_TIMER_NO_OWNER; + tim->status.u32 = status.u32; +} + +/* + * if timer is pending or stopped (or running on the same core than + * us), mark timer as configuring, and on success return the previous + * status of the timer + */ +static int +timer_set_config_state(struct rte_timer *tim, + union rte_timer_status *ret_prev_status) +{ + union rte_timer_status prev_status, status; + int success = 0; + unsigned lcore_id; + + lcore_id = rte_lcore_id(); + + /* wait that the timer is in correct status before update, + * and mark it as being configured */ + while (success == 0) { + prev_status.u32 = tim->status.u32; + + /* timer is running on another core, exit */ + if (prev_status.state == RTE_TIMER_RUNNING && + prev_status.owner != (uint16_t)lcore_id) + return -1; + + /* timer is being configured on another core */ + if (prev_status.state == RTE_TIMER_CONFIG) + return -1; + + /* here, we know that timer is stopped or pending, + * mark it atomically as being configured */ + status.state = RTE_TIMER_CONFIG; + status.owner = (int16_t)lcore_id; + success = rte_atomic32_cmpset(&tim->status.u32, + prev_status.u32, + status.u32); + } + + ret_prev_status->u32 = prev_status.u32; + return 0; +} + +/* + * if timer is pending, mark timer as running + */ +static int +timer_set_running_state(struct rte_timer *tim) +{ + union rte_timer_status prev_status, status; + unsigned lcore_id = rte_lcore_id(); + int success = 0; + + /* wait that the timer is in correct status before update, + * and mark it as running */ + while (success == 0) { + prev_status.u32 = tim->status.u32; + + /* timer is not pending anymore */ + if (prev_status.state != RTE_TIMER_PENDING) + return -1; + + /* here, we know that timer is stopped or pending, + * mark it atomically as beeing configured */ + status.state = RTE_TIMER_RUNNING; + status.owner = (int16_t)lcore_id; + success = rte_atomic32_cmpset(&tim->status.u32, + prev_status.u32, + status.u32); + } + + return 0; +} + +/* + * Return a skiplist level for a new entry. + * This probabalistically gives a level with p=1/4 that an entry at level n + * will also appear at level n+1. + */ +static uint32_t +timer_get_skiplist_level(unsigned curr_depth) +{ +#ifdef RTE_LIBRTE_TIMER_DEBUG + static uint32_t i, count = 0; + static uint32_t levels[MAX_SKIPLIST_DEPTH] = {0}; +#endif + + /* probability value is 1/4, i.e. all at level 0, 1 in 4 is at level 1, + * 1 in 16 at level 2, 1 in 64 at level 3, etc. Calculated using lowest + * bit position of a (pseudo)random number. + */ + uint32_t rand = rte_rand() & (UINT32_MAX - 1); + uint32_t level = rand == 0 ? MAX_SKIPLIST_DEPTH : (rte_bsf32(rand)-1) / 2; + + /* limit the levels used to one above our current level, so we don't, + * for instance, have a level 0 and a level 7 without anything between + */ + if (level > curr_depth) + level = curr_depth; + if (level >= MAX_SKIPLIST_DEPTH) + level = MAX_SKIPLIST_DEPTH-1; +#ifdef RTE_LIBRTE_TIMER_DEBUG + count ++; + levels[level]++; + if (count % 10000 == 0) + for (i = 0; i < MAX_SKIPLIST_DEPTH; i++) + printf("Level %u: %u\n", (unsigned)i, (unsigned)levels[i]); +#endif + return level; +} + +/* + * For a given time value, get the entries at each level which + * are <= that time value. + */ +static void +timer_get_prev_entries(uint64_t time_val, unsigned tim_lcore, + struct rte_timer **prev) +{ + unsigned lvl = priv_timer[tim_lcore].curr_skiplist_depth; + prev[lvl] = &priv_timer[tim_lcore].pending_head; + while(lvl != 0) { + lvl--; + prev[lvl] = prev[lvl+1]; + while (prev[lvl]->sl_next[lvl] && + prev[lvl]->sl_next[lvl]->expire <= time_val) + prev[lvl] = prev[lvl]->sl_next[lvl]; + } +} + +/* + * Given a timer node in the skiplist, find the previous entries for it at + * all skiplist levels. + */ +static void +timer_get_prev_entries_for_node(struct rte_timer *tim, unsigned tim_lcore, + struct rte_timer **prev) +{ + int i; + /* to get a specific entry in the list, look for just lower than the time + * values, and then increment on each level individually if necessary + */ + timer_get_prev_entries(tim->expire - 1, tim_lcore, prev); + for (i = priv_timer[tim_lcore].curr_skiplist_depth - 1; i >= 0; i--) { + while (prev[i]->sl_next[i] != NULL && + prev[i]->sl_next[i] != tim && + prev[i]->sl_next[i]->expire <= tim->expire) + prev[i] = prev[i]->sl_next[i]; + } +} + +/* + * add in list, lock if needed + * timer must be in config state + * timer must not be in a list + */ +static void +timer_add(struct rte_timer *tim, unsigned tim_lcore, int local_is_locked) +{ + unsigned lcore_id = rte_lcore_id(); + unsigned lvl; + struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; + + /* if timer needs to be scheduled on another core, we need to + * lock the list; if it is on local core, we need to lock if + * we are not called from rte_timer_manage() */ + if (tim_lcore != lcore_id || !local_is_locked) + rte_spinlock_lock(&priv_timer[tim_lcore].list_lock); + + /* find where exactly this element goes in the list of elements + * for each depth. */ + timer_get_prev_entries(tim->expire, tim_lcore, prev); + + /* now assign it a new level and add at that level */ + const unsigned tim_level = timer_get_skiplist_level( + priv_timer[tim_lcore].curr_skiplist_depth); + if (tim_level == priv_timer[tim_lcore].curr_skiplist_depth) + priv_timer[tim_lcore].curr_skiplist_depth++; + + lvl = tim_level; + while (lvl > 0) { + tim->sl_next[lvl] = prev[lvl]->sl_next[lvl]; + prev[lvl]->sl_next[lvl] = tim; + lvl--; + } + tim->sl_next[0] = prev[0]->sl_next[0]; + prev[0]->sl_next[0] = tim; + + /* save the lowest list entry into the expire field of the dummy hdr + * NOTE: this is not atomic on 32-bit*/ + priv_timer[tim_lcore].pending_head.expire = priv_timer[tim_lcore].\ + pending_head.sl_next[0]->expire; + + if (tim_lcore != lcore_id || !local_is_locked) + rte_spinlock_unlock(&priv_timer[tim_lcore].list_lock); +} + +/* + * del from list, lock if needed + * timer must be in config state + * timer must be in a list + */ +static void +timer_del(struct rte_timer *tim, union rte_timer_status prev_status, + int local_is_locked) +{ + unsigned lcore_id = rte_lcore_id(); + unsigned prev_owner = prev_status.owner; + int i; + struct rte_timer *prev[MAX_SKIPLIST_DEPTH+1]; + + /* if timer needs is pending another core, we need to lock the + * list; if it is on local core, we need to lock if we are not + * called from rte_timer_manage() */ + if (prev_owner != lcore_id || !local_is_locked) + rte_spinlock_lock(&priv_timer[prev_owner].list_lock); + + /* save the lowest list entry into the expire field of the dummy hdr. + * NOTE: this is not atomic on 32-bit */ + if (tim == priv_timer[prev_owner].pending_head.sl_next[0]) + priv_timer[prev_owner].pending_head.expire = + ((tim->sl_next[0] == NULL) ? 0 : tim->sl_next[0]->expire); + + /* adjust pointers from previous entries to point past this */ + timer_get_prev_entries_for_node(tim, prev_owner, prev); + for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) { + if (prev[i]->sl_next[i] == tim) + prev[i]->sl_next[i] = tim->sl_next[i]; + } + + /* in case we deleted last entry at a level, adjust down max level */ + for (i = priv_timer[prev_owner].curr_skiplist_depth - 1; i >= 0; i--) + if (priv_timer[prev_owner].pending_head.sl_next[i] == NULL) + priv_timer[prev_owner].curr_skiplist_depth --; + else + break; + + if (prev_owner != lcore_id || !local_is_locked) + rte_spinlock_unlock(&priv_timer[prev_owner].list_lock); +} + +/* Reset and start the timer associated with the timer handle (private func) */ +static int +__rte_timer_reset(struct rte_timer *tim, uint64_t expire, + uint64_t period, unsigned tim_lcore, + rte_timer_cb_t fct, void *arg, + int local_is_locked) +{ + union rte_timer_status prev_status, status; + int ret; + unsigned lcore_id = rte_lcore_id(); + + /* round robin for tim_lcore */ + if (tim_lcore == (unsigned)LCORE_ID_ANY) { + if (lcore_id < RTE_MAX_LCORE) { + /* EAL thread with valid lcore_id */ + tim_lcore = rte_get_next_lcore( + priv_timer[lcore_id].prev_lcore, + 0, 1); + priv_timer[lcore_id].prev_lcore = tim_lcore; + } else + /* non-EAL thread do not run rte_timer_manage(), + * so schedule the timer on the first enabled lcore. */ + tim_lcore = rte_get_next_lcore(LCORE_ID_ANY, 0, 1); + } + + /* wait that the timer is in correct status before update, + * and mark it as being configured */ + ret = timer_set_config_state(tim, &prev_status); + if (ret < 0) + return -1; + + __TIMER_STAT_ADD(reset, 1); + if (prev_status.state == RTE_TIMER_RUNNING && + lcore_id < RTE_MAX_LCORE) { + priv_timer[lcore_id].updated = 1; + } + + /* remove it from list */ + if (prev_status.state == RTE_TIMER_PENDING) { + timer_del(tim, prev_status, local_is_locked); + __TIMER_STAT_ADD(pending, -1); + } + + tim->period = period; + tim->expire = expire; + tim->f = fct; + tim->arg = arg; + + __TIMER_STAT_ADD(pending, 1); + timer_add(tim, tim_lcore, local_is_locked); + + /* update state: as we are in CONFIG state, only us can modify + * the state so we don't need to use cmpset() here */ + rte_wmb(); + status.state = RTE_TIMER_PENDING; + status.owner = (int16_t)tim_lcore; + tim->status.u32 = status.u32; + + return 0; +} + +/* Reset and start the timer associated with the timer handle tim */ +int +rte_timer_reset(struct rte_timer *tim, uint64_t ticks, + enum rte_timer_type type, unsigned tim_lcore, + rte_timer_cb_t fct, void *arg) +{ + uint64_t cur_time = rte_get_timer_cycles(); + uint64_t period; + + if (unlikely((tim_lcore != (unsigned)LCORE_ID_ANY) && + !rte_lcore_is_enabled(tim_lcore))) + return -1; + + if (type == PERIODICAL) + period = ticks; + else + period = 0; + + return __rte_timer_reset(tim, cur_time + ticks, period, tim_lcore, + fct, arg, 0); +} + +/* loop until rte_timer_reset() succeed */ +void +rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks, + enum rte_timer_type type, unsigned tim_lcore, + rte_timer_cb_t fct, void *arg) +{ + while (rte_timer_reset(tim, ticks, type, tim_lcore, + fct, arg) != 0) + rte_pause(); +} + +/* Stop the timer associated with the timer handle tim */ +int +rte_timer_stop(struct rte_timer *tim) +{ + union rte_timer_status prev_status, status; + unsigned lcore_id = rte_lcore_id(); + int ret; + + /* wait that the timer is in correct status before update, + * and mark it as being configured */ + ret = timer_set_config_state(tim, &prev_status); + if (ret < 0) + return -1; + + __TIMER_STAT_ADD(stop, 1); + if (prev_status.state == RTE_TIMER_RUNNING && + lcore_id < RTE_MAX_LCORE) { + priv_timer[lcore_id].updated = 1; + } + + /* remove it from list */ + if (prev_status.state == RTE_TIMER_PENDING) { + timer_del(tim, prev_status, 0); + __TIMER_STAT_ADD(pending, -1); + } + + /* mark timer as stopped */ + rte_wmb(); + status.state = RTE_TIMER_STOP; + status.owner = RTE_TIMER_NO_OWNER; + tim->status.u32 = status.u32; + + return 0; +} + +/* loop until rte_timer_stop() succeed */ +void +rte_timer_stop_sync(struct rte_timer *tim) +{ + while (rte_timer_stop(tim) != 0) + rte_pause(); +} + +/* Test the PENDING status of the timer handle tim */ +int +rte_timer_pending(struct rte_timer *tim) +{ + return tim->status.state == RTE_TIMER_PENDING; +} + +/* must be called periodically, run all timer that expired */ +void rte_timer_manage(void) +{ + union rte_timer_status status; + struct rte_timer *tim, *next_tim; + struct rte_timer *run_first_tim, **pprev; + unsigned lcore_id = rte_lcore_id(); + struct rte_timer *prev[MAX_SKIPLIST_DEPTH + 1]; + uint64_t cur_time; + int i, ret; + + /* timer manager only runs on EAL thread with valid lcore_id */ + assert(lcore_id < RTE_MAX_LCORE); + + __TIMER_STAT_ADD(manage, 1); + /* optimize for the case where per-cpu list is empty */ + if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) + return; + cur_time = rte_get_timer_cycles(); + +#ifdef RTE_ARCH_X86_64 + /* on 64-bit the value cached in the pending_head.expired will be + * updated atomically, so we can consult that for a quick check here + * outside the lock */ + if (likely(priv_timer[lcore_id].pending_head.expire > cur_time)) + return; +#endif + + /* browse ordered list, add expired timers in 'expired' list */ + rte_spinlock_lock(&priv_timer[lcore_id].list_lock); + + /* if nothing to do just unlock and return */ + if (priv_timer[lcore_id].pending_head.sl_next[0] == NULL || + priv_timer[lcore_id].pending_head.sl_next[0]->expire > cur_time) { + rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); + return; + } + + /* save start of list of expired timers */ + tim = priv_timer[lcore_id].pending_head.sl_next[0]; + + /* break the existing list at current time point */ + timer_get_prev_entries(cur_time, lcore_id, prev); + for (i = priv_timer[lcore_id].curr_skiplist_depth -1; i >= 0; i--) { + priv_timer[lcore_id].pending_head.sl_next[i] = + prev[i]->sl_next[i]; + if (prev[i]->sl_next[i] == NULL) + priv_timer[lcore_id].curr_skiplist_depth--; + prev[i] ->sl_next[i] = NULL; + } + + /* transition run-list from PENDING to RUNNING */ + run_first_tim = tim; + pprev = &run_first_tim; + + for ( ; tim != NULL; tim = next_tim) { + next_tim = tim->sl_next[0]; + + ret = timer_set_running_state(tim); + if (likely(ret == 0)) { + pprev = &tim->sl_next[0]; + } else { + /* another core is trying to re-config this one, + * remove it from local expired list and put it + * back on the priv_timer[] skip list */ + *pprev = next_tim; + timer_add(tim, lcore_id, 1); + } + } + + /* update the next to expire timer value */ + priv_timer[lcore_id].pending_head.expire = + (priv_timer[lcore_id].pending_head.sl_next[0] == NULL) ? 0 : + priv_timer[lcore_id].pending_head.sl_next[0]->expire; + + rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); + + /* now scan expired list and call callbacks */ + for (tim = run_first_tim; tim != NULL; tim = next_tim) { + next_tim = tim->sl_next[0]; + priv_timer[lcore_id].updated = 0; + + /* execute callback function with list unlocked */ + tim->f(tim, tim->arg); + + __TIMER_STAT_ADD(pending, -1); + /* the timer was stopped or reloaded by the callback + * function, we have nothing to do here */ + if (priv_timer[lcore_id].updated == 1) + continue; + + if (tim->period == 0) { + /* remove from done list and mark timer as stopped */ + status.state = RTE_TIMER_STOP; + status.owner = RTE_TIMER_NO_OWNER; + rte_wmb(); + tim->status.u32 = status.u32; + } + else { + /* keep it in list and mark timer as pending */ + rte_spinlock_lock(&priv_timer[lcore_id].list_lock); + status.state = RTE_TIMER_PENDING; + __TIMER_STAT_ADD(pending, 1); + status.owner = (int16_t)lcore_id; + rte_wmb(); + tim->status.u32 = status.u32; + __rte_timer_reset(tim, cur_time + tim->period, + tim->period, lcore_id, tim->f, tim->arg, 1); + rte_spinlock_unlock(&priv_timer[lcore_id].list_lock); + } + } +} + +/* dump statistics about timers */ +void rte_timer_dump_stats(FILE *f) +{ +#ifdef RTE_LIBRTE_TIMER_DEBUG + struct rte_timer_debug_stats sum; + unsigned lcore_id; + + memset(&sum, 0, sizeof(sum)); + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + sum.reset += priv_timer[lcore_id].stats.reset; + sum.stop += priv_timer[lcore_id].stats.stop; + sum.manage += priv_timer[lcore_id].stats.manage; + sum.pending += priv_timer[lcore_id].stats.pending; + } + fprintf(f, "Timer statistics:\n"); + fprintf(f, " reset = %"PRIu64"\n", sum.reset); + fprintf(f, " stop = %"PRIu64"\n", sum.stop); + fprintf(f, " manage = %"PRIu64"\n", sum.manage); + fprintf(f, " pending = %"PRIu64"\n", sum.pending); +#else + fprintf(f, "No timer statistics, RTE_LIBRTE_TIMER_DEBUG is disabled\n"); +#endif +} diff --git a/lib/librte_timer/rte_timer.h b/lib/librte_timer/rte_timer.h new file mode 100644 index 00000000..77547c6b --- /dev/null +++ b/lib/librte_timer/rte_timer.h @@ -0,0 +1,335 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_TIMER_H_ +#define _RTE_TIMER_H_ + +/** + * @file + RTE Timer + * + * This library provides a timer service to RTE Data Plane execution + * units that allows the execution of callback functions asynchronously. + * + * - Timers can be periodic or single (one-shot). + * - The timers can be loaded from one core and executed on another. This has + * to be specified in the call to rte_timer_reset(). + * - High precision is possible. NOTE: this depends on the call frequency to + * rte_timer_manage() that check the timer expiration for the local core. + * - If not used in an application, for improved performance, it can be + * disabled at compilation time by not calling the rte_timer_manage() + * to improve performance. + * + * The timer library uses the rte_get_hpet_cycles() function that + * uses the HPET, when available, to provide a reliable time reference. [HPET + * routines are provided by EAL, which falls back to using the chip TSC (time- + * stamp counter) as fallback when HPET is not available] + * + * This library provides an interface to add, delete and restart a + * timer. The API is based on the BSD callout(9) API with a few + * differences. + * + * See the RTE architecture documentation for more information about the + * design of this library. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_TIMER_STOP 0 /**< State: timer is stopped. */ +#define RTE_TIMER_PENDING 1 /**< State: timer is scheduled. */ +#define RTE_TIMER_RUNNING 2 /**< State: timer function is running. */ +#define RTE_TIMER_CONFIG 3 /**< State: timer is being configured. */ + +#define RTE_TIMER_NO_OWNER -2 /**< Timer has no owner. */ + +/** + * Timer type: Periodic or single (one-shot). + */ +enum rte_timer_type { + SINGLE, + PERIODICAL +}; + +/** + * Timer status: A union of the state (stopped, pending, running, + * config) and an owner (the id of the lcore that owns the timer). + */ +union rte_timer_status { + struct { + uint16_t state; /**< Stop, pending, running, config. */ + int16_t owner; /**< The lcore that owns the timer. */ + }; + uint32_t u32; /**< To atomic-set status + owner. */ +}; + +#ifdef RTE_LIBRTE_TIMER_DEBUG +/** + * A structure that stores the timer statistics (per-lcore). + */ +struct rte_timer_debug_stats { + uint64_t reset; /**< Number of success calls to rte_timer_reset(). */ + uint64_t stop; /**< Number of success calls to rte_timer_stop(). */ + uint64_t manage; /**< Number of calls to rte_timer_manage(). */ + uint64_t pending; /**< Number of pending/running timers. */ +}; +#endif + +struct rte_timer; + +/** + * Callback function type for timer expiry. + */ +typedef void (*rte_timer_cb_t)(struct rte_timer *, void *); + +#define MAX_SKIPLIST_DEPTH 10 + +/** + * A structure describing a timer in RTE. + */ +struct rte_timer +{ + uint64_t expire; /**< Time when timer expire. */ + struct rte_timer *sl_next[MAX_SKIPLIST_DEPTH]; + volatile union rte_timer_status status; /**< Status of timer. */ + uint64_t period; /**< Period of timer (0 if not periodic). */ + rte_timer_cb_t f; /**< Callback function. */ + void *arg; /**< Argument to callback function. */ +}; + + +#ifdef __cplusplus +/** + * A C++ static initializer for a timer structure. + */ +#define RTE_TIMER_INITIALIZER { \ + 0, \ + {NULL}, \ + {{RTE_TIMER_STOP, RTE_TIMER_NO_OWNER}}, \ + 0, \ + NULL, \ + NULL, \ + } +#else +/** + * A static initializer for a timer structure. + */ +#define RTE_TIMER_INITIALIZER { \ + .status = {{ \ + .state = RTE_TIMER_STOP, \ + .owner = RTE_TIMER_NO_OWNER, \ + }}, \ + } +#endif + +/** + * Initialize the timer library. + * + * Initializes internal variables (list, locks and so on) for the RTE + * timer library. + */ +void rte_timer_subsystem_init(void); + +/** + * Initialize a timer handle. + * + * The rte_timer_init() function initializes the timer handle *tim* + * for use. No operations can be performed on a timer before it is + * initialized. + * + * @param tim + * The timer to initialize. + */ +void rte_timer_init(struct rte_timer *tim); + +/** + * Reset and start the timer associated with the timer handle. + * + * The rte_timer_reset() function resets and starts the timer + * associated with the timer handle *tim*. When the timer expires after + * *ticks* HPET cycles, the function specified by *fct* will be called + * with the argument *arg* on core *tim_lcore*. + * + * If the timer associated with the timer handle is already running + * (in the RUNNING state), the function will fail. The user has to check + * the return value of the function to see if there is a chance that the + * timer is in the RUNNING state. + * + * If the timer is being configured on another core (the CONFIG state), + * it will also fail. + * + * If the timer is pending or stopped, it will be rescheduled with the + * new parameters. + * + * @param tim + * The timer handle. + * @param ticks + * The number of cycles (see rte_get_hpet_hz()) before the callback + * function is called. + * @param type + * The type can be either: + * - PERIODICAL: The timer is automatically reloaded after execution + * (returns to the PENDING state) + * - SINGLE: The timer is one-shot, that is, the timer goes to a + * STOPPED state after execution. + * @param tim_lcore + * The ID of the lcore where the timer callback function has to be + * executed. If tim_lcore is LCORE_ID_ANY, the timer library will + * launch it on a different core for each call (round-robin). + * @param fct + * The callback function of the timer. + * @param arg + * The user argument of the callback function. + * @return + * - 0: Success; the timer is scheduled. + * - (-1): Timer is in the RUNNING or CONFIG state. + */ +int rte_timer_reset(struct rte_timer *tim, uint64_t ticks, + enum rte_timer_type type, unsigned tim_lcore, + rte_timer_cb_t fct, void *arg); + + +/** + * Loop until rte_timer_reset() succeeds. + * + * Reset and start the timer associated with the timer handle. Always + * succeed. See rte_timer_reset() for details. + * + * @param tim + * The timer handle. + * @param ticks + * The number of cycles (see rte_get_hpet_hz()) before the callback + * function is called. + * @param type + * The type can be either: + * - PERIODICAL: The timer is automatically reloaded after execution + * (returns to the PENDING state) + * - SINGLE: The timer is one-shot, that is, the timer goes to a + * STOPPED state after execution. + * @param tim_lcore + * The ID of the lcore where the timer callback function has to be + * executed. If tim_lcore is LCORE_ID_ANY, the timer library will + * launch it on a different core for each call (round-robin). + * @param fct + * The callback function of the timer. + * @param arg + * The user argument of the callback function. + */ +void +rte_timer_reset_sync(struct rte_timer *tim, uint64_t ticks, + enum rte_timer_type type, unsigned tim_lcore, + rte_timer_cb_t fct, void *arg); + +/** + * Stop a timer. + * + * The rte_timer_stop() function stops the timer associated with the + * timer handle *tim*. It may fail if the timer is currently running or + * being configured. + * + * If the timer is pending or stopped (for instance, already expired), + * the function will succeed. The timer handle tim must have been + * initialized using rte_timer_init(), otherwise, undefined behavior + * will occur. + * + * This function can be called safely from a timer callback. If it + * succeeds, the timer is not referenced anymore by the timer library + * and the timer structure can be freed (even in the callback + * function). + * + * @param tim + * The timer handle. + * @return + * - 0: Success; the timer is stopped. + * - (-1): The timer is in the RUNNING or CONFIG state. + */ +int rte_timer_stop(struct rte_timer *tim); + + +/** + * Loop until rte_timer_stop() succeeds. + * + * After a call to this function, the timer identified by *tim* is + * stopped. See rte_timer_stop() for details. + * + * @param tim + * The timer handle. + */ +void rte_timer_stop_sync(struct rte_timer *tim); + +/** + * Test if a timer is pending. + * + * The rte_timer_pending() function tests the PENDING status + * of the timer handle *tim*. A PENDING timer is one that has been + * scheduled and whose function has not yet been called. + * + * @param tim + * The timer handle. + * @return + * - 0: The timer is not pending. + * - 1: The timer is pending. + */ +int rte_timer_pending(struct rte_timer *tim); + +/** + * Manage the timer list and execute callback functions. + * + * This function must be called periodically from EAL lcores + * main_loop(). It browses the list of pending timers and runs all + * timers that are expired. + * + * The precision of the timer depends on the call frequency of this + * function. However, the more often the function is called, the more + * CPU resources it will use. + */ +void rte_timer_manage(void); + +/** + * Dump statistics about timers. + * + * @param f + * A pointer to a file for output + */ +void rte_timer_dump_stats(FILE *f); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_TIMER_H_ */ diff --git a/lib/librte_timer/rte_timer_version.map b/lib/librte_timer/rte_timer_version.map new file mode 100644 index 00000000..9b2e4b86 --- /dev/null +++ b/lib/librte_timer/rte_timer_version.map @@ -0,0 +1,15 @@ +DPDK_2.0 { + global: + + rte_timer_dump_stats; + rte_timer_init; + rte_timer_manage; + rte_timer_pending; + rte_timer_reset; + rte_timer_reset_sync; + rte_timer_stop; + rte_timer_stop_sync; + rte_timer_subsystem_init; + + local: *; +}; diff --git a/lib/librte_vhost/Makefile b/lib/librte_vhost/Makefile new file mode 100644 index 00000000..e33ff53e --- /dev/null +++ b/lib/librte_vhost/Makefile @@ -0,0 +1,71 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# library name +LIB = librte_vhost.a + +EXPORT_MAP := rte_vhost_version.map + +LIBABIVER := 2 + +CFLAGS += $(WERROR_FLAGS) -I$(SRCDIR) -O3 -D_FILE_OFFSET_BITS=64 +ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y) +CFLAGS += -I vhost_user +LDLIBS += -lpthread +else +CFLAGS += -I vhost_cuse +LDLIBS += -lfuse +endif + +ifeq ($(CONFIG_RTE_LIBRTE_VHOST_NUMA),y) +LDLIBS += -lnuma +endif + +# all source are stored in SRCS-y +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) := virtio-net.c vhost_rxtx.c +ifeq ($(CONFIG_RTE_LIBRTE_VHOST_USER),y) +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_user/vhost-net-user.c vhost_user/virtio-net-user.c vhost_user/fd_man.c +else +SRCS-$(CONFIG_RTE_LIBRTE_VHOST) += vhost_cuse/vhost-net-cdev.c vhost_cuse/virtio-net-cdev.c vhost_cuse/eventfd_copy.c +endif + +# install includes +SYMLINK-$(CONFIG_RTE_LIBRTE_VHOST)-include += rte_virtio_net.h + +# dependencies +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_eal +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_ether +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_mbuf +DEPDIRS-$(CONFIG_RTE_LIBRTE_VHOST) += lib/librte_net + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_vhost/eventfd_link/Makefile b/lib/librte_vhost/eventfd_link/Makefile new file mode 100644 index 00000000..3140e8bf --- /dev/null +++ b/lib/librte_vhost/eventfd_link/Makefile @@ -0,0 +1,41 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +RTE_KERNELDIR ?= /lib/modules/$(shell uname -r)/build + +obj-m += eventfd_link.o + + +all: + make -C $(RTE_KERNELDIR) M=$(PWD) modules + +clean: + make -C $(RTE_KERNELDIR) M=$(PWD) clean diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.c b/lib/librte_vhost/eventfd_link/eventfd_link.c new file mode 100644 index 00000000..4b05b5a8 --- /dev/null +++ b/lib/librte_vhost/eventfd_link/eventfd_link.c @@ -0,0 +1,277 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#include +#include +#include +#include +#include + +#include "eventfd_link.h" + + +/* + * get_files_struct is copied from fs/file.c + */ +struct files_struct * +get_files_struct(struct task_struct *task) +{ + struct files_struct *files; + + task_lock(task); + files = task->files; + if (files) + atomic_inc(&files->count); + task_unlock(task); + + return files; +} + +/* + * put_files_struct is extracted from fs/file.c + */ +void +put_files_struct(struct files_struct *files) +{ + if (atomic_dec_and_test(&files->count)) + BUG(); +} + +static struct file * +fget_from_files(struct files_struct *files, unsigned fd) +{ + struct file *file; + + rcu_read_lock(); + file = fcheck_files(files, fd); + if (file) { + if (file->f_mode & FMODE_PATH || + !atomic_long_inc_not_zero(&file->f_count)) { + + file = NULL; + } + } + rcu_read_unlock(); + + return file; +} + +static long +eventfd_link_ioctl_copy2(unsigned long arg) +{ + void __user *argp = (void __user *) arg; + struct task_struct *task_target = NULL; + struct file *file; + struct files_struct *files; + struct eventfd_copy2 eventfd_copy2; + long ret = -EFAULT; + + if (copy_from_user(&eventfd_copy2, argp, sizeof(struct eventfd_copy2))) + goto out; + + /* + * Find the task struct for the target pid + */ + ret = -ESRCH; + + task_target = + get_pid_task(find_vpid(eventfd_copy2.pid), PIDTYPE_PID); + if (task_target == NULL) { + pr_info("Unable to find pid %d\n", eventfd_copy2.pid); + goto out; + } + + ret = -ESTALE; + files = get_files_struct(task_target); + if (files == NULL) { + pr_info("Failed to get target files struct\n"); + goto out_task; + } + + ret = -EBADF; + file = fget_from_files(files, eventfd_copy2.fd); + put_files_struct(files); + + if (file == NULL) { + pr_info("Failed to get fd %d from target\n", eventfd_copy2.fd); + goto out_task; + } + + /* + * Install the file struct from the target process into the + * newly allocated file desciptor of the source process. + */ + ret = get_unused_fd_flags(eventfd_copy2.flags); + if (ret < 0) { + fput(file); + goto out_task; + } + fd_install(ret, file); + +out_task: + put_task_struct(task_target); +out: + return ret; +} + +static long +eventfd_link_ioctl_copy(unsigned long arg) +{ + void __user *argp = (void __user *) arg; + struct task_struct *task_target = NULL; + struct file *file; + struct files_struct *files; + struct fdtable *fdt; + struct eventfd_copy eventfd_copy; + long ret = -EFAULT; + + if (copy_from_user(&eventfd_copy, argp, sizeof(struct eventfd_copy))) + goto out; + + /* + * Find the task struct for the target pid + */ + ret = -ESRCH; + + task_target = + get_pid_task(find_vpid(eventfd_copy.target_pid), PIDTYPE_PID); + if (task_target == NULL) { + pr_info("Unable to find pid %d\n", eventfd_copy.target_pid); + goto out; + } + + ret = -ESTALE; + files = get_files_struct(current); + if (files == NULL) { + pr_info("Failed to get current files struct\n"); + goto out_task; + } + + ret = -EBADF; + file = fget_from_files(files, eventfd_copy.source_fd); + + if (file == NULL) { + pr_info("Failed to get fd %d from source\n", + eventfd_copy.source_fd); + put_files_struct(files); + goto out_task; + } + + /* + * Release the existing eventfd in the source process + */ + spin_lock(&files->file_lock); + fput(file); + filp_close(file, files); + fdt = files_fdtable(files); + fdt->fd[eventfd_copy.source_fd] = NULL; + spin_unlock(&files->file_lock); + + put_files_struct(files); + + /* + * Find the file struct associated with the target fd. + */ + + ret = -ESTALE; + files = get_files_struct(task_target); + if (files == NULL) { + pr_info("Failed to get target files struct\n"); + goto out_task; + } + + ret = -EBADF; + file = fget_from_files(files, eventfd_copy.target_fd); + put_files_struct(files); + + if (file == NULL) { + pr_info("Failed to get fd %d from target\n", + eventfd_copy.target_fd); + goto out_task; + } + + /* + * Install the file struct from the target process into the + * file desciptor of the source process, + */ + + fd_install(eventfd_copy.source_fd, file); + ret = 0; + +out_task: + put_task_struct(task_target); +out: + return ret; +} + +static long +eventfd_link_ioctl(struct file *f, unsigned int ioctl, unsigned long arg) +{ + long ret = -ENOIOCTLCMD; + + switch (ioctl) { + case EVENTFD_COPY: + ret = eventfd_link_ioctl_copy(arg); + break; + case EVENTFD_COPY2: + ret = eventfd_link_ioctl_copy2(arg); + break; + } + + return ret; +} + +static const struct file_operations eventfd_link_fops = { + .owner = THIS_MODULE, + .unlocked_ioctl = eventfd_link_ioctl, +}; + + +static struct miscdevice eventfd_link_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = "eventfd-link", + .fops = &eventfd_link_fops, +}; + +static int __init +eventfd_link_init(void) +{ + return misc_register(&eventfd_link_misc); +} + +module_init(eventfd_link_init); + +static void __exit +eventfd_link_exit(void) +{ + misc_deregister(&eventfd_link_misc); +} + +module_exit(eventfd_link_exit); + +MODULE_VERSION("0.0.1"); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Anthony Fee"); +MODULE_DESCRIPTION("Link eventfd"); +MODULE_ALIAS("devname:eventfd-link"); diff --git a/lib/librte_vhost/eventfd_link/eventfd_link.h b/lib/librte_vhost/eventfd_link/eventfd_link.h new file mode 100644 index 00000000..5ebc20b8 --- /dev/null +++ b/lib/librte_vhost/eventfd_link/eventfd_link.h @@ -0,0 +1,94 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _EVENTFD_LINK_H_ +#define _EVENTFD_LINK_H_ + +/* + * arguements for the EVENTFD_COPY ioctl + */ +struct eventfd_copy { + unsigned target_fd; /* fd in the target pid */ + unsigned source_fd; /* fd in the calling pid */ + pid_t target_pid; /* pid of the target pid */ +}; + +/* + * ioctl to copy an fd entry in calling process to an fd in a target process + * NOTE: this one should be + * #define EVENTFD_COPY _IOWR('D', 1, struct eventfd_copy) actually + */ +#define EVENTFD_COPY 1 + +/* + * arguments for the EVENTFD_COPY2 ioctl + */ +struct eventfd_copy2 { + unsigned fd; /* fd to steal */ + pid_t pid; /* pid of the process to steal from */ + unsigned flags; /* flags to allocate new fd with */ +}; + +/* + * ioctl to copy an fd entry from the target process into newly allocated + * fd in the calling process + */ +#define EVENTFD_COPY2 _IOW('D', 2, struct eventfd_copy2) + +#endif /* _EVENTFD_LINK_H_ */ diff --git a/lib/librte_vhost/libvirt/qemu-wrap.py b/lib/librte_vhost/libvirt/qemu-wrap.py new file mode 100755 index 00000000..e6a2cc9d --- /dev/null +++ b/lib/librte_vhost/libvirt/qemu-wrap.py @@ -0,0 +1,387 @@ +#!/usr/bin/python +#/* +# * BSD LICENSE +# * +# * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# * All rights reserved. +# * +# * Redistribution and use in source and binary forms, with or without +# * modification, are permitted provided that the following conditions +# * are met: +# * +# * * Redistributions of source code must retain the above copyright +# * notice, this list of conditions and the following disclaimer. +# * * Redistributions in binary form must reproduce the above copyright +# * notice, this list of conditions and the following disclaimer in +# * the documentation and/or other materials provided with the +# * distribution. +# * * Neither the name of Intel Corporation nor the names of its +# * contributors may be used to endorse or promote products derived +# * from this software without specific prior written permission. +# * +# * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# */ + +##################################################################### +# This script is designed to modify the call to the QEMU emulator +# to support userspace vhost when starting a guest machine through +# libvirt with vhost enabled. The steps to enable this are as follows +# and should be run as root: +# +# 1. Place this script in a libvirtd's binary search PATH ($PATH) +# A good location would be in the same directory that the QEMU +# binary is located +# +# 2. Ensure that the script has the same owner/group and file +# permissions as the QEMU binary +# +# 3. Update the VM xml file using "virsh edit VM.xml" +# +# 3.a) Set the VM to use the launch script +# +# Set the emulator path contained in the +# tags +# +# e.g replace /usr/bin/qemu-kvm +# with /usr/bin/qemu-wrap.py +# +# 3.b) Set the VM's device's to use vhost-net offload +# +# +# +# +# +# +# 4. Enable libvirt to access our userpace device file by adding it to +# controllers cgroup for libvirtd using the following steps +# +# 4.a) In /etc/libvirt/qemu.conf add/edit the following lines: +# 1) cgroup_controllers = [ ... "devices", ... ] +# 2) clear_emulator_capabilities = 0 +# 3) user = "root" +# 4) group = "root" +# 5) cgroup_device_acl = [ +# "/dev/null", "/dev/full", "/dev/zero", +# "/dev/random", "/dev/urandom", +# "/dev/ptmx", "/dev/kvm", "/dev/kqemu", +# "/dev/rtc", "/dev/hpet", "/dev/net/tun", +# "/dev/", +# "/dev/hugepages", +# ] +# +# 4.b) Disable SELinux or set to permissive mode +# +# 4.c) Mount cgroup device controller +# "mkdir /dev/cgroup" +# "mount -t cgroup none /dev/cgroup -o devices" +# +# 4.d) Set hugetlbfs_mount variable - ( Optional ) +# VMs using userspace vhost must use hugepage backed +# memory. This can be enabled in the libvirt XML +# config by adding a memory backing section to the +# XML config e.g. +# +# +# +# This memory backing section should be added after the +# and sections. This will add +# flags "-mem-prealloc -mem-path " to the QEMU +# command line. The hugetlbfs_mount variable can be used +# to override the default passed through by libvirt. +# +# if "-mem-prealloc" or "-mem-path " are not passed +# through and a vhost device is detected then these options will +# be automatically added by this script. This script will detect +# the system hugetlbfs mount point to be used for . The +# default for this script can be overidden by the +# hugetlbfs_dir variable in the configuration section of this script. +# +# +# 4.e) Restart the libvirtd system process +# e.g. on Fedora "systemctl restart libvirtd.service" +# +# +# 4.f) Edit the Configuration Parameters section of this script +# to point to the correct emulator location and set any +# addition options +# +# The script modifies the libvirtd Qemu call by modifying/adding +# options based on the configuration parameters below. +# NOTE: +# emul_path and us_vhost_path must be set +# All other parameters are optional +##################################################################### + + +############################################# +# Configuration Parameters +############################################# +#Path to QEMU binary +emul_path = "/usr/local/bin/qemu-system-x86_64" + +#Path to userspace vhost device file +# This filename should match the --dev-basename parameters of +# the command used to launch the userspace vhost sample application e.g. +# if the sample app lauch command is: +# ./build/vhost-switch ..... --dev-basename usvhost +# then this variable should be set to: +# us_vhost_path = "/dev/usvhost" +us_vhost_path = "/dev/usvhost" + +#List of additional user defined emulation options. These options will +#be added to all Qemu calls +emul_opts_user = [] + +#List of additional user defined emulation options for vhost only. +#These options will only be added to vhost enabled guests +emul_opts_user_vhost = [] + +#For all VHOST enabled VMs, the VM memory is preallocated from hugetlbfs +# Set this variable to one to enable this option for all VMs +use_huge_all = 0 + +#Instead of autodetecting, override the hugetlbfs directory by setting +#this variable +hugetlbfs_dir = "" + +############################################# + + +############################################# +# ****** Do Not Modify Below this Line ****** +############################################# + +import sys, os, subprocess +import time +import signal + + +#List of open userspace vhost file descriptors +fd_list = [] + +#additional virtio device flags when using userspace vhost +vhost_flags = [ "csum=off", + "gso=off", + "guest_tso4=off", + "guest_tso6=off", + "guest_ecn=off" + ] + +#String of the path to the Qemu process pid +qemu_pid = "/tmp/%d-qemu.pid" % os.getpid() + +############################################# +# Signal haldler to kill Qemu subprocess +############################################# +def kill_qemu_process(signum, stack): + pidfile = open(qemu_pid, 'r') + pid = int(pidfile.read()) + os.killpg(pid, signal.SIGTERM) + pidfile.close() + + +############################################# +# Find the system hugefile mount point. +# Note: +# if multiple hugetlbfs mount points exist +# then the first one found will be used +############################################# +def find_huge_mount(): + + if (len(hugetlbfs_dir)): + return hugetlbfs_dir + + huge_mount = "" + + if (os.access("/proc/mounts", os.F_OK)): + f = open("/proc/mounts", "r") + line = f.readline() + while line: + line_split = line.split(" ") + if line_split[2] == 'hugetlbfs': + huge_mount = line_split[1] + break + line = f.readline() + else: + print "/proc/mounts not found" + exit (1) + + f.close + if len(huge_mount) == 0: + print "Failed to find hugetlbfs mount point" + exit (1) + + return huge_mount + + +############################################# +# Get a userspace Vhost file descriptor +############################################# +def get_vhost_fd(): + + if (os.access(us_vhost_path, os.F_OK)): + fd = os.open( us_vhost_path, os.O_RDWR) + else: + print ("US-Vhost file %s not found" %us_vhost_path) + exit (1) + + return fd + + +############################################# +# Check for vhostfd. if found then replace +# with our own vhost fd and append any vhost +# flags onto the end +############################################# +def modify_netdev_arg(arg): + + global fd_list + vhost_in_use = 0 + s = '' + new_opts = [] + netdev_opts = arg.split(",") + + for opt in netdev_opts: + #check if vhost is used + if "vhost" == opt[:5]: + vhost_in_use = 1 + else: + new_opts.append(opt) + + #if using vhost append vhost options + if vhost_in_use == 1: + #append vhost on option + new_opts.append('vhost=on') + #append vhostfd ption + new_fd = get_vhost_fd() + new_opts.append('vhostfd=' + str(new_fd)) + fd_list.append(new_fd) + + #concatenate all options + for opt in new_opts: + if len(s) > 0: + s+=',' + + s+=opt + + return s + + +############################################# +# Main +############################################# +def main(): + + global fd_list + global vhost_in_use + new_args = [] + num_cmd_args = len(sys.argv) + emul_call = '' + mem_prealloc_set = 0 + mem_path_set = 0 + num = 0; + + #parse the parameters + while (num < num_cmd_args): + arg = sys.argv[num] + + #Check netdev +1 parameter for vhostfd + if arg == '-netdev': + num_vhost_devs = len(fd_list) + new_args.append(arg) + + num+=1 + arg = sys.argv[num] + mod_arg = modify_netdev_arg(arg) + new_args.append(mod_arg) + + #append vhost flags if this is a vhost device + # and -device is the next arg + # i.e -device -opt1,-opt2,...,-opt3,%vhost + if (num_vhost_devs < len(fd_list)): + num+=1 + arg = sys.argv[num] + if arg == '-device': + new_args.append(arg) + num+=1 + new_arg = sys.argv[num] + for flag in vhost_flags: + new_arg = ''.join([new_arg,',',flag]) + new_args.append(new_arg) + else: + new_args.append(arg) + elif arg == '-mem-prealloc': + mem_prealloc_set = 1 + new_args.append(arg) + elif arg == '-mem-path': + mem_path_set = 1 + new_args.append(arg) + + else: + new_args.append(arg) + + num+=1 + + #Set Qemu binary location + emul_call+=emul_path + emul_call+=" " + + #Add prealloc mem options if using vhost and not already added + if ((len(fd_list) > 0) and (mem_prealloc_set == 0)): + emul_call += "-mem-prealloc " + + #Add mempath mem options if using vhost and not already added + if ((len(fd_list) > 0) and (mem_path_set == 0)): + #Detect and add hugetlbfs mount point + mp = find_huge_mount() + mp = "".join(["-mem-path ", mp]) + emul_call += mp + emul_call += " " + + #add user options + for opt in emul_opts_user: + emul_call += opt + emul_call += " " + + #Add add user vhost only options + if len(fd_list) > 0: + for opt in emul_opts_user_vhost: + emul_call += opt + emul_call += " " + + #Add updated libvirt options + iter_args = iter(new_args) + #skip 1st arg i.e. call to this script + next(iter_args) + for arg in iter_args: + emul_call+=str(arg) + emul_call+= " " + + emul_call += "-pidfile %s " % qemu_pid + #Call QEMU + process = subprocess.Popen(emul_call, shell=True, preexec_fn=os.setsid) + + for sig in [signal.SIGTERM, signal.SIGINT, signal.SIGHUP, signal.SIGQUIT]: + signal.signal(sig, kill_qemu_process) + + process.wait() + + #Close usvhost files + for fd in fd_list: + os.close(fd) + #Cleanup temporary files + if os.access(qemu_pid, os.F_OK): + os.remove(qemu_pid) + +if __name__ == "__main__": + main() diff --git a/lib/librte_vhost/rte_vhost_version.map b/lib/librte_vhost/rte_vhost_version.map new file mode 100644 index 00000000..3d8709e5 --- /dev/null +++ b/lib/librte_vhost/rte_vhost_version.map @@ -0,0 +1,22 @@ +DPDK_2.0 { + global: + + rte_vhost_dequeue_burst; + rte_vhost_driver_callback_register; + rte_vhost_driver_register; + rte_vhost_driver_session_start; + rte_vhost_enable_guest_notification; + rte_vhost_enqueue_burst; + rte_vhost_feature_disable; + rte_vhost_feature_enable; + rte_vhost_feature_get; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_vhost_driver_unregister; + +} DPDK_2.0; diff --git a/lib/librte_vhost/rte_virtio_net.h b/lib/librte_vhost/rte_virtio_net.h new file mode 100644 index 00000000..600b20b4 --- /dev/null +++ b/lib/librte_vhost/rte_virtio_net.h @@ -0,0 +1,286 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_NET_H_ +#define _VIRTIO_NET_H_ + +/** + * @file + * Interface to vhost net + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +struct rte_mbuf; + +#define VHOST_MEMORY_MAX_NREGIONS 8 + +/* Used to indicate that the device is running on a data core */ +#define VIRTIO_DEV_RUNNING 1 + +/* Backend value set by guest. */ +#define VIRTIO_DEV_STOPPED -1 + + +/* Enum for virtqueue management. */ +enum {VIRTIO_RXQ, VIRTIO_TXQ, VIRTIO_QNUM}; + +#define BUF_VECTOR_MAX 256 + +/** + * Structure contains buffer address, length and descriptor index + * from vring to do scatter RX. + */ +struct buf_vector { + uint64_t buf_addr; + uint32_t buf_len; + uint32_t desc_idx; +}; + +/** + * Structure contains variables relevant to RX/TX virtqueues. + */ +struct vhost_virtqueue { + struct vring_desc *desc; /**< Virtqueue descriptor ring. */ + struct vring_avail *avail; /**< Virtqueue available ring. */ + struct vring_used *used; /**< Virtqueue used ring. */ + uint32_t size; /**< Size of descriptor ring. */ + uint32_t backend; /**< Backend value to determine if device should started/stopped. */ + uint16_t vhost_hlen; /**< Vhost header length (varies depending on RX merge buffers. */ + volatile uint16_t last_used_idx; /**< Last index used on the available ring */ + volatile uint16_t last_used_idx_res; /**< Used for multiple devices reserving buffers. */ +#define VIRTIO_INVALID_EVENTFD (-1) +#define VIRTIO_UNINITIALIZED_EVENTFD (-2) + int callfd; /**< Used to notify the guest (trigger interrupt). */ + int kickfd; /**< Currently unused as polling mode is enabled. */ + int enabled; + uint64_t log_guest_addr; /**< Physical address of used ring, for logging */ + uint64_t reserved[15]; /**< Reserve some spaces for future extension. */ + struct buf_vector buf_vec[BUF_VECTOR_MAX]; /**< for scatter RX. */ +} __rte_cache_aligned; + +/* Old kernels have no such macro defined */ +#ifndef VIRTIO_NET_F_GUEST_ANNOUNCE + #define VIRTIO_NET_F_GUEST_ANNOUNCE 21 +#endif + + +/* + * Make an extra wrapper for VIRTIO_NET_F_MQ and + * VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX as they are + * introduced since kernel v3.8. This makes our + * code buildable for older kernel. + */ +#ifdef VIRTIO_NET_F_MQ + #define VHOST_MAX_QUEUE_PAIRS VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX + #define VHOST_SUPPORTS_MQ (1ULL << VIRTIO_NET_F_MQ) +#else + #define VHOST_MAX_QUEUE_PAIRS 1 + #define VHOST_SUPPORTS_MQ 0 +#endif + +/* + * Define virtio 1.0 for older kernels + */ +#ifndef VIRTIO_F_VERSION_1 + #define VIRTIO_F_VERSION_1 32 +#endif + +/** + * Device structure contains all configuration information relating to the device. + */ +struct virtio_net { + struct virtio_memory *mem; /**< QEMU memory and memory region information. */ + uint64_t features; /**< Negotiated feature set. */ + uint64_t protocol_features; /**< Negotiated protocol feature set. */ + uint64_t device_fh; /**< device identifier. */ + uint32_t flags; /**< Device flags. Only used to check if device is running on data core. */ +#define IF_NAME_SZ (PATH_MAX > IFNAMSIZ ? PATH_MAX : IFNAMSIZ) + char ifname[IF_NAME_SZ]; /**< Name of the tap device or socket path. */ + uint32_t virt_qp_nb; /**< number of queue pair we have allocated */ + void *priv; /**< private context */ + uint64_t log_size; /**< Size of log area */ + uint64_t log_base; /**< Where dirty pages are logged */ + struct ether_addr mac; /**< MAC address */ + rte_atomic16_t broadcast_rarp; /**< A flag to tell if we need broadcast rarp packet */ + uint64_t reserved[61]; /**< Reserve some spaces for future extension. */ + struct vhost_virtqueue *virtqueue[VHOST_MAX_QUEUE_PAIRS * 2]; /**< Contains all virtqueue information. */ +} __rte_cache_aligned; + +/** + * Information relating to memory regions including offsets to addresses in QEMUs memory file. + */ +struct virtio_memory_regions { + uint64_t guest_phys_address; /**< Base guest physical address of region. */ + uint64_t guest_phys_address_end; /**< End guest physical address of region. */ + uint64_t memory_size; /**< Size of region. */ + uint64_t userspace_address; /**< Base userspace address of region. */ + uint64_t address_offset; /**< Offset of region for address translation. */ +}; + + +/** + * Memory structure includes region and mapping information. + */ +struct virtio_memory { + uint64_t base_address; /**< Base QEMU userspace address of the memory file. */ + uint64_t mapped_address; /**< Mapped address of memory file base in our applications memory space. */ + uint64_t mapped_size; /**< Total size of memory file. */ + uint32_t nregions; /**< Number of memory regions. */ + struct virtio_memory_regions regions[0]; /**< Memory region information. */ +}; + +/** + * Device and vring operations. + * + * Make sure to set VIRTIO_DEV_RUNNING to the device flags in new_device and + * remove it in destroy_device. + * + */ +struct virtio_net_device_ops { + int (*new_device)(struct virtio_net *); /**< Add device. */ + void (*destroy_device)(volatile struct virtio_net *); /**< Remove device. */ + + int (*vring_state_changed)(struct virtio_net *dev, uint16_t queue_id, int enable); /**< triggered when a vring is enabled or disabled */ +}; + +static inline uint16_t __attribute__((always_inline)) +rte_vring_available_entries(struct virtio_net *dev, uint16_t queue_id) +{ + struct vhost_virtqueue *vq = dev->virtqueue[queue_id]; + + if (!vq->enabled) + return 0; + + return *(volatile uint16_t *)&vq->avail->idx - vq->last_used_idx_res; +} + +/** + * Function to convert guest physical addresses to vhost virtual addresses. + * This is used to convert guest virtio buffer addresses. + */ +static inline uint64_t __attribute__((always_inline)) +gpa_to_vva(struct virtio_net *dev, uint64_t guest_pa) +{ + struct virtio_memory_regions *region; + uint32_t regionidx; + uint64_t vhost_va = 0; + + for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { + region = &dev->mem->regions[regionidx]; + if ((guest_pa >= region->guest_phys_address) && + (guest_pa <= region->guest_phys_address_end)) { + vhost_va = region->address_offset + guest_pa; + break; + } + } + return vhost_va; +} + + +/** + * Disable features in feature_mask. Returns 0 on success. + */ +int rte_vhost_feature_disable(uint64_t feature_mask); + +/** + * Enable features in feature_mask. Returns 0 on success. + */ +int rte_vhost_feature_enable(uint64_t feature_mask); + +/* Returns currently supported vhost features */ +uint64_t rte_vhost_feature_get(void); + +int rte_vhost_enable_guest_notification(struct virtio_net *dev, uint16_t queue_id, int enable); + +/* Register vhost driver. dev_name could be different for multiple instance support. */ +int rte_vhost_driver_register(const char *dev_name); + +/* Unregister vhost driver. This is only meaningful to vhost user. */ +int rte_vhost_driver_unregister(const char *dev_name); + +/* Register callbacks. */ +int rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const); +/* Start vhost driver session blocking loop. */ +int rte_vhost_driver_session_start(void); + +/** + * This function adds buffers to the virtio devices RX virtqueue. Buffers can + * be received from the physical port or from another virtual device. A packet + * count is returned to indicate the number of packets that were succesfully + * added to the RX queue. + * @param dev + * virtio-net device + * @param queue_id + * virtio queue index in mq case + * @param pkts + * array to contain packets to be enqueued + * @param count + * packets num to be enqueued + * @return + * num of packets enqueued + */ +uint16_t rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint16_t count); + +/** + * This function gets guest buffers from the virtio device TX virtqueue, + * construct host mbufs, copies guest buffer content to host mbufs and + * store them in pkts to be processed. + * @param dev + * virtio-net device + * @param queue_id + * virtio queue index in mq case + * @param mbuf_pool + * mbuf_pool where host mbuf is allocated. + * @param pkts + * array to contain packets to be dequeued + * @param count + * packets num to be dequeued + * @return + * num of packets dequeued + */ +uint16_t rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count); + +#endif /* _VIRTIO_NET_H_ */ diff --git a/lib/librte_vhost/vhost-net.h b/lib/librte_vhost/vhost-net.h new file mode 100644 index 00000000..f193a1f6 --- /dev/null +++ b/lib/librte_vhost/vhost-net.h @@ -0,0 +1,114 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_CDEV_H_ +#define _VHOST_NET_CDEV_H_ +#include +#include +#include +#include +#include + +#include + +#include "rte_virtio_net.h" + +/* Macros for printing using RTE_LOG */ +#define RTE_LOGTYPE_VHOST_CONFIG RTE_LOGTYPE_USER1 +#define RTE_LOGTYPE_VHOST_DATA RTE_LOGTYPE_USER1 + +#ifdef RTE_LIBRTE_VHOST_DEBUG +#define VHOST_MAX_PRINT_BUFF 6072 +#define LOG_LEVEL RTE_LOG_DEBUG +#define LOG_DEBUG(log_type, fmt, args...) RTE_LOG(DEBUG, log_type, fmt, ##args) +#define PRINT_PACKET(device, addr, size, header) do { \ + char *pkt_addr = (char *)(addr); \ + unsigned int index; \ + char packet[VHOST_MAX_PRINT_BUFF]; \ + \ + if ((header)) \ + snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Header size %d: ", (device->device_fh), (size)); \ + else \ + snprintf(packet, VHOST_MAX_PRINT_BUFF, "(%" PRIu64 ") Packet size %d: ", (device->device_fh), (size)); \ + for (index = 0; index < (size); index++) { \ + snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), \ + "%02hhx ", pkt_addr[index]); \ + } \ + snprintf(packet + strnlen(packet, VHOST_MAX_PRINT_BUFF), VHOST_MAX_PRINT_BUFF - strnlen(packet, VHOST_MAX_PRINT_BUFF), "\n"); \ + \ + LOG_DEBUG(VHOST_DATA, "%s", packet); \ +} while (0) +#else +#define LOG_LEVEL RTE_LOG_INFO +#define LOG_DEBUG(log_type, fmt, args...) do {} while (0) +#define PRINT_PACKET(device, addr, size, header) do {} while (0) +#endif + + +/* + * Structure used to identify device context. + */ +struct vhost_device_ctx { + pid_t pid; /* PID of process calling the IOCTL. */ + uint64_t fh; /* Populated with fi->fh to track the device index. */ +}; + +int vhost_new_device(struct vhost_device_ctx); +void vhost_destroy_device(struct vhost_device_ctx); + +void vhost_set_ifname(struct vhost_device_ctx, + const char *if_name, unsigned int if_len); + +int vhost_get_features(struct vhost_device_ctx, uint64_t *); +int vhost_set_features(struct vhost_device_ctx, uint64_t *); + +int vhost_set_vring_num(struct vhost_device_ctx, struct vhost_vring_state *); +int vhost_set_vring_addr(struct vhost_device_ctx, struct vhost_vring_addr *); +int vhost_set_vring_base(struct vhost_device_ctx, struct vhost_vring_state *); +int vhost_get_vring_base(struct vhost_device_ctx, + uint32_t, struct vhost_vring_state *); + +int vhost_set_vring_kick(struct vhost_device_ctx, struct vhost_vring_file *); +int vhost_set_vring_call(struct vhost_device_ctx, struct vhost_vring_file *); + +int vhost_set_backend(struct vhost_device_ctx, struct vhost_vring_file *); + +int vhost_set_owner(struct vhost_device_ctx); +int vhost_reset_owner(struct vhost_device_ctx); + +/* + * Backend-specific cleanup. Defined by vhost-cuse and vhost-user. + */ +void vhost_backend_cleanup(struct virtio_net *dev); + +#endif /* _VHOST_NET_CDEV_H_ */ diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.c b/lib/librte_vhost/vhost_cuse/eventfd_copy.c new file mode 100644 index 00000000..154b32a4 --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.c @@ -0,0 +1,104 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#include "eventfd_link/eventfd_link.h" +#include "eventfd_copy.h" +#include "vhost-net.h" + +static const char eventfd_cdev[] = "/dev/eventfd-link"; + +static int eventfd_link = -1; + +int +eventfd_init(void) +{ + if (eventfd_link >= 0) + return 0; + + eventfd_link = open(eventfd_cdev, O_RDWR); + if (eventfd_link < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "eventfd_link module is not loaded\n"); + return -1; + } + + return 0; +} + +int +eventfd_free(void) +{ + if (eventfd_link >= 0) + close(eventfd_link); + return 0; +} + +/* + * This function uses the eventfd_link kernel module to copy an eventfd file + * descriptor provided by QEMU in to our process space. + */ +int +eventfd_copy(int target_fd, int target_pid) +{ + int ret; + struct eventfd_copy2 eventfd_copy2; + + + /* Open the character device to the kernel module. */ + /* TODO: check this earlier rather than fail until VM boots! */ + if (eventfd_init() < 0) + return -1; + + eventfd_copy2.fd = target_fd; + eventfd_copy2.pid = target_pid; + eventfd_copy2.flags = O_NONBLOCK | O_CLOEXEC; + /* Call the IOCTL to copy the eventfd. */ + ret = ioctl(eventfd_link, EVENTFD_COPY2, &eventfd_copy2); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "EVENTFD_COPY2 ioctl failed\n"); + return -1; + } + + return ret; +} diff --git a/lib/librte_vhost/vhost_cuse/eventfd_copy.h b/lib/librte_vhost/vhost_cuse/eventfd_copy.h new file mode 100644 index 00000000..5f446ca0 --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/eventfd_copy.h @@ -0,0 +1,45 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _EVENTFD_H +#define _EVENTFD_H + +int +eventfd_init(void); + +int +eventfd_free(void); + +int +eventfd_copy(int target_fd, int target_pid); + +#endif diff --git a/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c new file mode 100644 index 00000000..c613e68e --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/vhost-net-cdev.c @@ -0,0 +1,426 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "virtio-net-cdev.h" +#include "vhost-net.h" +#include "eventfd_copy.h" + +#define FUSE_OPT_DUMMY "\0\0" +#define FUSE_OPT_FORE "-f\0\0" +#define FUSE_OPT_NOMULTI "-s\0\0" + +static const uint32_t default_major = 231; +static const uint32_t default_minor = 1; +static const char cuse_device_name[] = "/dev/cuse"; +static const char default_cdev[] = "vhost-net"; + +static struct fuse_session *session; + +/* + * Returns vhost_device_ctx from given fuse_req_t. The index is populated later + * when the device is added to the device linked list. + */ +static struct vhost_device_ctx +fuse_req_to_vhost_ctx(fuse_req_t req, struct fuse_file_info *fi) +{ + struct vhost_device_ctx ctx; + struct fuse_ctx const *const req_ctx = fuse_req_ctx(req); + + ctx.pid = req_ctx->pid; + ctx.fh = fi->fh; + + return ctx; +} + +/* + * When the device is created in QEMU it gets initialised here and + * added to the device linked list. + */ +static void +vhost_net_open(fuse_req_t req, struct fuse_file_info *fi) +{ + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + int err = 0; + + err = vhost_new_device(ctx); + if (err == -1) { + fuse_reply_err(req, EPERM); + return; + } + + fi->fh = err; + + RTE_LOG(INFO, VHOST_CONFIG, + "(%"PRIu64") Device configuration started\n", fi->fh); + fuse_reply_open(req, fi); +} + +/* + * When QEMU is shutdown or killed the device gets released. + */ +static void +vhost_net_release(fuse_req_t req, struct fuse_file_info *fi) +{ + int err = 0; + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + + vhost_destroy_device(ctx); + RTE_LOG(INFO, VHOST_CONFIG, "(%"PRIu64") Device released\n", ctx.fh); + fuse_reply_err(req, err); +} + +/* + * Boilerplate code for CUSE IOCTL + * Implicit arguments: ctx, req, result. + */ +#define VHOST_IOCTL(func) do { \ + result = (func)(ctx); \ + fuse_reply_ioctl(req, result, NULL, 0); \ +} while (0) + +/* + * Boilerplate IOCTL RETRY + * Implicit arguments: req. + */ +#define VHOST_IOCTL_RETRY(size_r, size_w) do { \ + struct iovec iov_r = { arg, (size_r) }; \ + struct iovec iov_w = { arg, (size_w) }; \ + fuse_reply_ioctl_retry(req, &iov_r, \ + (size_r) ? 1 : 0, &iov_w, (size_w) ? 1 : 0);\ +} while (0) + +/* + * Boilerplate code for CUSE Read IOCTL + * Implicit arguments: ctx, req, result, in_bufsz, in_buf. + */ +#define VHOST_IOCTL_R(type, var, func) do { \ + if (!in_bufsz) { \ + VHOST_IOCTL_RETRY(sizeof(type), 0);\ + } else { \ + (var) = *(const type*)in_buf; \ + result = func(ctx, &(var)); \ + fuse_reply_ioctl(req, result, NULL, 0);\ + } \ +} while (0) + +/* + * Boilerplate code for CUSE Write IOCTL + * Implicit arguments: ctx, req, result, out_bufsz. + */ +#define VHOST_IOCTL_W(type, var, func) do { \ + if (!out_bufsz) { \ + VHOST_IOCTL_RETRY(0, sizeof(type));\ + } else { \ + result = (func)(ctx, &(var));\ + fuse_reply_ioctl(req, result, &(var), sizeof(type));\ + } \ +} while (0) + +/* + * Boilerplate code for CUSE Read/Write IOCTL + * Implicit arguments: ctx, req, result, in_bufsz, in_buf. + */ +#define VHOST_IOCTL_RW(type1, var1, type2, var2, func) do { \ + if (!in_bufsz) { \ + VHOST_IOCTL_RETRY(sizeof(type1), sizeof(type2));\ + } else { \ + (var1) = *(const type1*) (in_buf); \ + result = (func)(ctx, (var1), &(var2)); \ + fuse_reply_ioctl(req, result, &(var2), sizeof(type2));\ + } \ +} while (0) + +/* + * The IOCTLs are handled using CUSE/FUSE in userspace. Depending on the type + * of IOCTL a buffer is requested to read or to write. This request is handled + * by FUSE and the buffer is then given to CUSE. + */ +static void +vhost_net_ioctl(fuse_req_t req, int cmd, void *arg, + struct fuse_file_info *fi, __rte_unused unsigned flags, + const void *in_buf, size_t in_bufsz, size_t out_bufsz) +{ + struct vhost_device_ctx ctx = fuse_req_to_vhost_ctx(req, fi); + struct vhost_vring_file file; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + uint64_t features; + uint32_t index; + int result = 0; + + switch (cmd) { + case VHOST_NET_SET_BACKEND: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_NET_SET_BACKEND\n", ctx.fh); + if (!in_buf) { + VHOST_IOCTL_RETRY(sizeof(file), 0); + break; + } + file = *(const struct vhost_vring_file *)in_buf; + result = cuse_set_backend(ctx, &file); + fuse_reply_ioctl(req, result, NULL, 0); + break; + + case VHOST_GET_FEATURES: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_GET_FEATURES\n", ctx.fh); + VHOST_IOCTL_W(uint64_t, features, vhost_get_features); + break; + + case VHOST_SET_FEATURES: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_FEATURES\n", ctx.fh); + VHOST_IOCTL_R(uint64_t, features, vhost_set_features); + break; + + case VHOST_RESET_OWNER: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_RESET_OWNER\n", ctx.fh); + VHOST_IOCTL(vhost_reset_owner); + break; + + case VHOST_SET_OWNER: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_OWNER\n", ctx.fh); + VHOST_IOCTL(vhost_set_owner); + break; + + case VHOST_SET_MEM_TABLE: + /*TODO fix race condition.*/ + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_MEM_TABLE\n", ctx.fh); + static struct vhost_memory mem_temp; + + switch (in_bufsz) { + case 0: + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory), 0); + break; + + case sizeof(struct vhost_memory): + mem_temp = *(const struct vhost_memory *) in_buf; + + if (mem_temp.nregions > 0) { + VHOST_IOCTL_RETRY(sizeof(struct vhost_memory) + + (sizeof(struct vhost_memory_region) * + mem_temp.nregions), 0); + } else { + result = -1; + fuse_reply_ioctl(req, result, NULL, 0); + } + break; + + default: + result = cuse_set_mem_table(ctx, in_buf, + mem_temp.nregions); + if (result) + fuse_reply_err(req, EINVAL); + else + fuse_reply_ioctl(req, result, NULL, 0); + } + break; + + case VHOST_SET_VRING_NUM: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_NUM\n", ctx.fh); + VHOST_IOCTL_R(struct vhost_vring_state, state, + vhost_set_vring_num); + break; + + case VHOST_SET_VRING_BASE: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_BASE\n", ctx.fh); + VHOST_IOCTL_R(struct vhost_vring_state, state, + vhost_set_vring_base); + break; + + case VHOST_GET_VRING_BASE: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_GET_VRING_BASE\n", ctx.fh); + VHOST_IOCTL_RW(uint32_t, index, + struct vhost_vring_state, state, vhost_get_vring_base); + break; + + case VHOST_SET_VRING_ADDR: + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_ADDR\n", ctx.fh); + VHOST_IOCTL_R(struct vhost_vring_addr, addr, + vhost_set_vring_addr); + break; + + case VHOST_SET_VRING_KICK: + case VHOST_SET_VRING_CALL: + if (cmd == VHOST_SET_VRING_KICK) + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_KICK\n", + ctx.fh); + else + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: VHOST_SET_VRING_CALL\n", + ctx.fh); + if (!in_buf) + VHOST_IOCTL_RETRY(sizeof(struct vhost_vring_file), 0); + else { + int fd; + file = *(const struct vhost_vring_file *)in_buf; + LOG_DEBUG(VHOST_CONFIG, + "idx:%d fd:%d\n", file.index, file.fd); + fd = eventfd_copy(file.fd, ctx.pid); + if (fd < 0) { + fuse_reply_ioctl(req, -1, NULL, 0); + result = -1; + break; + } + file.fd = fd; + if (cmd == VHOST_SET_VRING_KICK) { + result = vhost_set_vring_kick(ctx, &file); + fuse_reply_ioctl(req, result, NULL, 0); + } else { + result = vhost_set_vring_call(ctx, &file); + fuse_reply_ioctl(req, result, NULL, 0); + } + } + break; + + default: + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") IOCTL: DOESN NOT EXIST\n", ctx.fh); + result = -1; + fuse_reply_ioctl(req, result, NULL, 0); + } + + if (result < 0) + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: FAIL\n", ctx.fh); + else + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") IOCTL: SUCCESS\n", ctx.fh); +} + +/* + * Structure handling open, release and ioctl function pointers is populated. + */ +static const struct cuse_lowlevel_ops vhost_net_ops = { + .open = vhost_net_open, + .release = vhost_net_release, + .ioctl = vhost_net_ioctl, +}; + +/* + * cuse_info is populated and used to register the cuse device. + * vhost_net_device_ops are also passed when the device is registered in app. + */ +int +rte_vhost_driver_register(const char *dev_name) +{ + struct cuse_info cuse_info; + char device_name[PATH_MAX] = ""; + char char_device_name[PATH_MAX] = ""; + const char *device_argv[] = { device_name }; + + char fuse_opt_dummy[] = FUSE_OPT_DUMMY; + char fuse_opt_fore[] = FUSE_OPT_FORE; + char fuse_opt_nomulti[] = FUSE_OPT_NOMULTI; + char *fuse_argv[] = {fuse_opt_dummy, fuse_opt_fore, fuse_opt_nomulti}; + + if (access(cuse_device_name, R_OK | W_OK) < 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "char device %s can't be accessed, maybe not exist\n", + cuse_device_name); + return -1; + } + + if (eventfd_init() < 0) + return -1; + + /* + * The device name is created. This is passed to QEMU so that it can + * register the device with our application. + */ + snprintf(device_name, PATH_MAX, "DEVNAME=%s", dev_name); + snprintf(char_device_name, PATH_MAX, "/dev/%s", dev_name); + + /* Check if device already exists. */ + if (access(char_device_name, F_OK) != -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "char device %s already exists\n", char_device_name); + return -1; + } + + memset(&cuse_info, 0, sizeof(cuse_info)); + cuse_info.dev_major = default_major; + cuse_info.dev_minor = default_minor; + cuse_info.dev_info_argc = 1; + cuse_info.dev_info_argv = device_argv; + cuse_info.flags = CUSE_UNRESTRICTED_IOCTL; + + session = cuse_lowlevel_setup(3, fuse_argv, + &cuse_info, &vhost_net_ops, 0, NULL); + if (session == NULL) + return -1; + + return 0; +} + +/** + * An empty function for unregister + */ +int +rte_vhost_driver_unregister(const char *dev_name __rte_unused) +{ + return 0; +} + +/** + * The CUSE session is launched allowing the application to receive open, + * release and ioctl calls. + */ +int +rte_vhost_driver_session_start(void) +{ + fuse_session_loop(session); + + return 0; +} diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c new file mode 100644 index 00000000..a68a8bd4 --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.c @@ -0,0 +1,435 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include "rte_virtio_net.h" +#include "vhost-net.h" +#include "virtio-net-cdev.h" +#include "virtio-net.h" +#include "eventfd_copy.h" + +/* Line size for reading maps file. */ +static const uint32_t BUFSIZE = PATH_MAX; + +/* Size of prot char array in procmap. */ +#define PROT_SZ 5 + +/* Number of elements in procmap struct. */ +#define PROCMAP_SZ 8 + +/* Structure containing information gathered from maps file. */ +struct procmap { + uint64_t va_start; /* Start virtual address in file. */ + uint64_t len; /* Size of file. */ + uint64_t pgoff; /* Not used. */ + uint32_t maj; /* Not used. */ + uint32_t min; /* Not used. */ + uint32_t ino; /* Not used. */ + char prot[PROT_SZ]; /* Not used. */ + char fname[PATH_MAX]; /* File name. */ +}; + +/* + * Locate the file containing QEMU's memory space and + * map it to our address space. + */ +static int +host_memory_map(pid_t pid, uint64_t addr, + uint64_t *mapped_address, uint64_t *mapped_size) +{ + struct dirent *dptr = NULL; + struct procmap procmap; + DIR *dp = NULL; + int fd; + int i; + char memfile[PATH_MAX]; + char mapfile[PATH_MAX]; + char procdir[PATH_MAX]; + char resolved_path[PATH_MAX]; + char *path = NULL; + FILE *fmap; + void *map; + uint8_t found = 0; + char line[BUFSIZE]; + char dlm[] = "- : "; + char *str, *sp, *in[PROCMAP_SZ]; + char *end = NULL; + + /* Path where mem files are located. */ + snprintf(procdir, PATH_MAX, "/proc/%u/fd/", pid); + /* Maps file used to locate mem file. */ + snprintf(mapfile, PATH_MAX, "/proc/%u/maps", pid); + + fmap = fopen(mapfile, "r"); + if (fmap == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to open maps file for pid %d\n", + pid); + return -1; + } + + /* Read through maps file until we find out base_address. */ + while (fgets(line, BUFSIZE, fmap) != 0) { + str = line; + errno = 0; + /* Split line into fields. */ + for (i = 0; i < PROCMAP_SZ; i++) { + in[i] = strtok_r(str, &dlm[i], &sp); + if ((in[i] == NULL) || (errno != 0)) { + fclose(fmap); + return -1; + } + str = NULL; + } + + /* Convert/Copy each field as needed. */ + procmap.va_start = strtoull(in[0], &end, 16); + if ((in[0] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.len = strtoull(in[1], &end, 16); + if ((in[1] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.pgoff = strtoull(in[3], &end, 16); + if ((in[3] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.maj = strtoul(in[4], &end, 16); + if ((in[4] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.min = strtoul(in[5], &end, 16); + if ((in[5] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + procmap.ino = strtoul(in[6], &end, 16); + if ((in[6] == '\0') || (end == NULL) || (*end != '\0') || + (errno != 0)) { + fclose(fmap); + return -1; + } + + memcpy(&procmap.prot, in[2], PROT_SZ); + memcpy(&procmap.fname, in[7], PATH_MAX); + + if (procmap.va_start == addr) { + procmap.len = procmap.len - procmap.va_start; + found = 1; + break; + } + } + fclose(fmap); + + if (!found) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find memory file in pid %d maps file\n", + pid); + return -1; + } + + /* Find the guest memory file among the process fds. */ + dp = opendir(procdir); + if (dp == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Cannot open pid %d process directory\n", + pid); + return -1; + } + + found = 0; + + /* Read the fd directory contents. */ + while (NULL != (dptr = readdir(dp))) { + snprintf(memfile, PATH_MAX, "/proc/%u/fd/%s", + pid, dptr->d_name); + path = realpath(memfile, resolved_path); + if ((path == NULL) && (strlen(resolved_path) == 0)) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to resolve fd directory\n"); + closedir(dp); + return -1; + } + if (strncmp(resolved_path, procmap.fname, + strnlen(procmap.fname, PATH_MAX)) == 0) { + found = 1; + break; + } + } + + closedir(dp); + + if (found == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find memory file for pid %d\n", + pid); + return -1; + } + /* Open the shared memory file and map the memory into this process. */ + fd = open(memfile, O_RDWR); + + if (fd == -1) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to open %s for pid %d\n", + memfile, pid); + return -1; + } + + map = mmap(0, (size_t)procmap.len, PROT_READ|PROT_WRITE, + MAP_POPULATE|MAP_SHARED, fd, 0); + close(fd); + + if (map == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "Error mapping the file %s for pid %d\n", + memfile, pid); + return -1; + } + + /* Store the memory address and size in the device data structure */ + *mapped_address = (uint64_t)(uintptr_t)map; + *mapped_size = procmap.len; + + LOG_DEBUG(VHOST_CONFIG, + "Mem File: %s->%s - Size: %llu - VA: %p\n", + memfile, resolved_path, + (unsigned long long)*mapped_size, map); + + return 0; +} + +int +cuse_set_mem_table(struct vhost_device_ctx ctx, + const struct vhost_memory *mem_regions_addr, uint32_t nregions) +{ + uint64_t size = offsetof(struct vhost_memory, regions); + uint32_t idx, valid_regions; + struct virtio_memory_regions *pregion; + struct vhost_memory_region *mem_regions = (void *)(uintptr_t) + ((uint64_t)(uintptr_t)mem_regions_addr + size); + uint64_t base_address = 0, mapped_address, mapped_size; + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + if (dev->mem && dev->mem->mapped_address) { + munmap((void *)(uintptr_t)dev->mem->mapped_address, + (size_t)dev->mem->mapped_size); + free(dev->mem); + dev->mem = NULL; + } + + dev->mem = calloc(1, sizeof(struct virtio_memory) + + sizeof(struct virtio_memory_regions) * nregions); + if (dev->mem == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to allocate memory for dev->mem\n", + dev->device_fh); + return -1; + } + + pregion = &dev->mem->regions[0]; + + for (idx = 0; idx < nregions; idx++) { + pregion[idx].guest_phys_address = + mem_regions[idx].guest_phys_addr; + pregion[idx].guest_phys_address_end = + pregion[idx].guest_phys_address + + mem_regions[idx].memory_size; + pregion[idx].memory_size = + mem_regions[idx].memory_size; + pregion[idx].userspace_address = + mem_regions[idx].userspace_addr; + + LOG_DEBUG(VHOST_CONFIG, + "REGION: %u - GPA: %p - QVA: %p - SIZE (%"PRIu64")\n", + idx, + (void *)(uintptr_t)pregion[idx].guest_phys_address, + (void *)(uintptr_t)pregion[idx].userspace_address, + pregion[idx].memory_size); + + /*set the base address mapping*/ + if (pregion[idx].guest_phys_address == 0x0) { + base_address = + pregion[idx].userspace_address; + /* Map VM memory file */ + if (host_memory_map(ctx.pid, base_address, + &mapped_address, &mapped_size) != 0) { + free(dev->mem); + dev->mem = NULL; + return -1; + } + dev->mem->mapped_address = mapped_address; + dev->mem->base_address = base_address; + dev->mem->mapped_size = mapped_size; + } + } + + /* Check that we have a valid base address. */ + if (base_address == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find base address of qemu memory file.\n"); + free(dev->mem); + dev->mem = NULL; + return -1; + } + + valid_regions = nregions; + for (idx = 0; idx < nregions; idx++) { + if ((pregion[idx].userspace_address < base_address) || + (pregion[idx].userspace_address > + (base_address + mapped_size))) + valid_regions--; + } + + + if (valid_regions != nregions) { + valid_regions = 0; + for (idx = nregions; 0 != idx--; ) { + if ((pregion[idx].userspace_address < base_address) || + (pregion[idx].userspace_address > + (base_address + mapped_size))) { + memmove(&pregion[idx], &pregion[idx + 1], + sizeof(struct virtio_memory_regions) * + valid_regions); + } else + valid_regions++; + } + } + + for (idx = 0; idx < valid_regions; idx++) { + pregion[idx].address_offset = + mapped_address - base_address + + pregion[idx].userspace_address - + pregion[idx].guest_phys_address; + } + dev->mem->nregions = valid_regions; + + return 0; +} + +/* + * Function to get the tap device name from the provided file descriptor and + * save it in the device structure. + */ +static int +get_ifname(struct vhost_device_ctx ctx, struct virtio_net *dev, int tap_fd, int pid) +{ + int fd_tap; + struct ifreq ifr; + uint32_t ifr_size; + int ret; + + fd_tap = eventfd_copy(tap_fd, pid); + if (fd_tap < 0) + return -1; + + ret = ioctl(fd_tap, TUNGETIFF, &ifr); + + if (close(fd_tap) < 0) + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") fd close failed\n", + dev->device_fh); + + if (ret >= 0) { + ifr_size = strnlen(ifr.ifr_name, sizeof(ifr.ifr_name)); + vhost_set_ifname(ctx, ifr.ifr_name, ifr_size); + } else + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") TUNGETIFF ioctl failed\n", + dev->device_fh); + + return 0; +} + +int cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + if (!(dev->flags & VIRTIO_DEV_RUNNING) && file->fd != VIRTIO_DEV_STOPPED) + get_ifname(ctx, dev, file->fd, ctx.pid); + + return vhost_set_backend(ctx, file); +} + +void +vhost_backend_cleanup(struct virtio_net *dev) +{ + /* Unmap QEMU memory file if mapped. */ + if (dev->mem) { + munmap((void *)(uintptr_t)dev->mem->mapped_address, + (size_t)dev->mem->mapped_size); + free(dev->mem); + dev->mem = NULL; + } +} diff --git a/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h new file mode 100644 index 00000000..eb6b0bab --- /dev/null +++ b/lib/librte_vhost/vhost_cuse/virtio-net-cdev.h @@ -0,0 +1,48 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#ifndef _VIRTIO_NET_CDEV_H +#define _VIRTIO_NET_CDEV_H + +#include +#include + +#include "vhost-net.h" + +int +cuse_set_mem_table(struct vhost_device_ctx ctx, + const struct vhost_memory *mem_regions_addr, uint32_t nregions); + +int +cuse_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *); + +#endif diff --git a/lib/librte_vhost/vhost_rxtx.c b/lib/librte_vhost/vhost_rxtx.c new file mode 100644 index 00000000..750821a4 --- /dev/null +++ b/lib/librte_vhost/vhost_rxtx.c @@ -0,0 +1,947 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "vhost-net.h" + +#define MAX_PKT_BURST 32 +#define VHOST_LOG_PAGE 4096 + +static inline void __attribute__((always_inline)) +vhost_log_page(uint8_t *log_base, uint64_t page) +{ + log_base[page / 8] |= 1 << (page % 8); +} + +static inline void __attribute__((always_inline)) +vhost_log_write(struct virtio_net *dev, uint64_t addr, uint64_t len) +{ + uint64_t page; + + if (likely(((dev->features & (1ULL << VHOST_F_LOG_ALL)) == 0) || + !dev->log_base || !len)) + return; + + if (unlikely(dev->log_size <= ((addr + len - 1) / VHOST_LOG_PAGE / 8))) + return; + + /* To make sure guest memory updates are committed before logging */ + rte_smp_wmb(); + + page = addr / VHOST_LOG_PAGE; + while (page * VHOST_LOG_PAGE < addr + len) { + vhost_log_page((uint8_t *)(uintptr_t)dev->log_base, page); + page += 1; + } +} + +static inline void __attribute__((always_inline)) +vhost_log_used_vring(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint64_t offset, uint64_t len) +{ + vhost_log_write(dev, vq->log_guest_addr + offset, len); +} + +static bool +is_valid_virt_queue_idx(uint32_t idx, int is_tx, uint32_t qp_nb) +{ + return (is_tx ^ (idx & 1)) == 0 && idx < qp_nb * VIRTIO_QNUM; +} + +static void +virtio_enqueue_offload(struct rte_mbuf *m_buf, struct virtio_net_hdr *net_hdr) +{ + if (m_buf->ol_flags & PKT_TX_L4_MASK) { + net_hdr->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM; + net_hdr->csum_start = m_buf->l2_len + m_buf->l3_len; + + switch (m_buf->ol_flags & PKT_TX_L4_MASK) { + case PKT_TX_TCP_CKSUM: + net_hdr->csum_offset = (offsetof(struct tcp_hdr, + cksum)); + break; + case PKT_TX_UDP_CKSUM: + net_hdr->csum_offset = (offsetof(struct udp_hdr, + dgram_cksum)); + break; + case PKT_TX_SCTP_CKSUM: + net_hdr->csum_offset = (offsetof(struct sctp_hdr, + cksum)); + break; + } + } + + if (m_buf->ol_flags & PKT_TX_TCP_SEG) { + if (m_buf->ol_flags & PKT_TX_IPV4) + net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV4; + else + net_hdr->gso_type = VIRTIO_NET_HDR_GSO_TCPV6; + net_hdr->gso_size = m_buf->tso_segsz; + net_hdr->hdr_len = m_buf->l2_len + m_buf->l3_len + + m_buf->l4_len; + } +} + +static inline void +copy_virtio_net_hdr(struct vhost_virtqueue *vq, uint64_t desc_addr, + struct virtio_net_hdr_mrg_rxbuf hdr) +{ + if (vq->vhost_hlen == sizeof(struct virtio_net_hdr_mrg_rxbuf)) + *(struct virtio_net_hdr_mrg_rxbuf *)(uintptr_t)desc_addr = hdr; + else + *(struct virtio_net_hdr *)(uintptr_t)desc_addr = hdr.hdr; +} + +static inline int __attribute__((always_inline)) +copy_mbuf_to_desc(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf *m, uint16_t desc_idx, uint32_t *copied) +{ + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct vring_desc *desc; + uint64_t desc_addr; + struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; + + desc = &vq->desc[desc_idx]; + if (unlikely(desc->len < vq->vhost_hlen)) + return -1; + + desc_addr = gpa_to_vva(dev, desc->addr); + rte_prefetch0((void *)(uintptr_t)desc_addr); + + virtio_enqueue_offload(m, &virtio_hdr.hdr); + copy_virtio_net_hdr(vq, desc_addr, virtio_hdr); + vhost_log_write(dev, desc->addr, vq->vhost_hlen); + PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0); + + desc_offset = vq->vhost_hlen; + desc_avail = desc->len - vq->vhost_hlen; + + *copied = rte_pktmbuf_pkt_len(m); + mbuf_avail = rte_pktmbuf_data_len(m); + mbuf_offset = 0; + while (mbuf_avail != 0 || m->next != NULL) { + /* done with current mbuf, fetch next */ + if (mbuf_avail == 0) { + m = m->next; + + mbuf_offset = 0; + mbuf_avail = rte_pktmbuf_data_len(m); + } + + /* done with current desc buf, fetch next */ + if (desc_avail == 0) { + if ((desc->flags & VRING_DESC_F_NEXT) == 0) { + /* Room in vring buffer is not enough */ + return -1; + } + if (unlikely(desc->next >= vq->size)) + return -1; + + desc = &vq->desc[desc->next]; + desc_addr = gpa_to_vva(dev, desc->addr); + desc_offset = 0; + desc_avail = desc->len; + } + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); + vhost_log_write(dev, desc->addr + desc_offset, cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + } + + return 0; +} + +/* + * As many data cores may want to access available buffers + * they need to be reserved. + */ +static inline uint32_t +reserve_avail_buf(struct vhost_virtqueue *vq, uint32_t count, + uint16_t *start, uint16_t *end) +{ + uint16_t res_start_idx; + uint16_t res_end_idx; + uint16_t avail_idx; + uint16_t free_entries; + int success; + + count = RTE_MIN(count, (uint32_t)MAX_PKT_BURST); + +again: + res_start_idx = vq->last_used_idx_res; + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + + free_entries = avail_idx - res_start_idx; + count = RTE_MIN(count, free_entries); + if (count == 0) + return 0; + + res_end_idx = res_start_idx + count; + + /* + * update vq->last_used_idx_res atomically; try again if failed. + * + * TODO: Allow to disable cmpset if no concurrency in application. + */ + success = rte_atomic16_cmpset(&vq->last_used_idx_res, + res_start_idx, res_end_idx); + if (unlikely(!success)) + goto again; + + *start = res_start_idx; + *end = res_end_idx; + + return count; +} + +/** + * This function adds buffers to the virtio devices RX virtqueue. Buffers can + * be received from the physical port or from another virtio device. A packet + * count is returned to indicate the number of packets that are succesfully + * added to the RX queue. This function works when the mbuf is scattered, but + * it doesn't support the mergeable feature. + */ +static inline uint32_t __attribute__((always_inline)) +virtio_dev_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) +{ + struct vhost_virtqueue *vq; + uint16_t res_start_idx, res_end_idx; + uint16_t desc_indexes[MAX_PKT_BURST]; + uint32_t i; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_rx()\n", dev->device_fh); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + + count = reserve_avail_buf(vq, count, &res_start_idx, &res_end_idx); + if (count == 0) + return 0; + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") res_start_idx %d| res_end_idx Index %d\n", + dev->device_fh, res_start_idx, res_end_idx); + + /* Retrieve all of the desc indexes first to avoid caching issues. */ + rte_prefetch0(&vq->avail->ring[res_start_idx & (vq->size - 1)]); + for (i = 0; i < count; i++) { + desc_indexes[i] = vq->avail->ring[(res_start_idx + i) & + (vq->size - 1)]; + } + + rte_prefetch0(&vq->desc[desc_indexes[0]]); + for (i = 0; i < count; i++) { + uint16_t desc_idx = desc_indexes[i]; + uint16_t used_idx = (res_start_idx + i) & (vq->size - 1); + uint32_t copied; + int err; + + err = copy_mbuf_to_desc(dev, vq, pkts[i], desc_idx, &copied); + + vq->used->ring[used_idx].id = desc_idx; + if (unlikely(err)) + vq->used->ring[used_idx].len = vq->vhost_hlen; + else + vq->used->ring[used_idx].len = copied + vq->vhost_hlen; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + + if (i + 1 < count) + rte_prefetch0(&vq->desc[desc_indexes[i+1]]); + } + + rte_smp_wmb(); + + /* Wait until it's our turn to add our buffer to the used ring. */ + while (unlikely(vq->last_used_idx != res_start_idx)) + rte_pause(); + + *(volatile uint16_t *)&vq->used->idx += count; + vq->last_used_idx = res_end_idx; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); + + /* flush used->idx update before we read avail->flags. */ + rte_mb(); + + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) + && (vq->callfd >= 0)) + eventfd_write(vq->callfd, (eventfd_t)1); + return count; +} + +static inline int +fill_vec_buf(struct vhost_virtqueue *vq, uint32_t avail_idx, + uint32_t *allocated, uint32_t *vec_idx) +{ + uint16_t idx = vq->avail->ring[avail_idx & (vq->size - 1)]; + uint32_t vec_id = *vec_idx; + uint32_t len = *allocated; + + while (1) { + if (unlikely(vec_id >= BUF_VECTOR_MAX || idx >= vq->size)) + return -1; + + len += vq->desc[idx].len; + vq->buf_vec[vec_id].buf_addr = vq->desc[idx].addr; + vq->buf_vec[vec_id].buf_len = vq->desc[idx].len; + vq->buf_vec[vec_id].desc_idx = idx; + vec_id++; + + if ((vq->desc[idx].flags & VRING_DESC_F_NEXT) == 0) + break; + + idx = vq->desc[idx].next; + } + + *allocated = len; + *vec_idx = vec_id; + + return 0; +} + +/* + * As many data cores may want to access available buffers concurrently, + * they need to be reserved. + * + * Returns -1 on fail, 0 on success + */ +static inline int +reserve_avail_buf_mergeable(struct vhost_virtqueue *vq, uint32_t size, + uint16_t *start, uint16_t *end) +{ + uint16_t res_start_idx; + uint16_t res_cur_idx; + uint16_t avail_idx; + uint32_t allocated; + uint32_t vec_idx; + uint16_t tries; + +again: + res_start_idx = vq->last_used_idx_res; + res_cur_idx = res_start_idx; + + allocated = 0; + vec_idx = 0; + tries = 0; + while (1) { + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + if (unlikely(res_cur_idx == avail_idx)) + return -1; + + if (unlikely(fill_vec_buf(vq, res_cur_idx, &allocated, + &vec_idx) < 0)) + return -1; + + res_cur_idx++; + tries++; + + if (allocated >= size) + break; + + /* + * if we tried all available ring items, and still + * can't get enough buf, it means something abnormal + * happened. + */ + if (unlikely(tries >= vq->size)) + return -1; + } + + /* + * update vq->last_used_idx_res atomically. + * retry again if failed. + */ + if (rte_atomic16_cmpset(&vq->last_used_idx_res, + res_start_idx, res_cur_idx) == 0) + goto again; + + *start = res_start_idx; + *end = res_cur_idx; + return 0; +} + +static inline uint32_t __attribute__((always_inline)) +copy_mbuf_to_desc_mergeable(struct virtio_net *dev, struct vhost_virtqueue *vq, + uint16_t res_start_idx, uint16_t res_end_idx, + struct rte_mbuf *m) +{ + struct virtio_net_hdr_mrg_rxbuf virtio_hdr = {{0, 0, 0, 0, 0, 0}, 0}; + uint32_t vec_idx = 0; + uint16_t cur_idx = res_start_idx; + uint64_t desc_addr; + uint32_t mbuf_offset, mbuf_avail; + uint32_t desc_offset, desc_avail; + uint32_t cpy_len; + uint16_t desc_idx, used_idx; + + if (unlikely(m == NULL)) + return 0; + + LOG_DEBUG(VHOST_DATA, + "(%"PRIu64") Current Index %d| End Index %d\n", + dev->device_fh, cur_idx, res_end_idx); + + if (vq->buf_vec[vec_idx].buf_len < vq->vhost_hlen) + return -1; + + desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + rte_prefetch0((void *)(uintptr_t)desc_addr); + + virtio_hdr.num_buffers = res_end_idx - res_start_idx; + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") RX: Num merge buffers %d\n", + dev->device_fh, virtio_hdr.num_buffers); + + virtio_enqueue_offload(m, &virtio_hdr.hdr); + copy_virtio_net_hdr(vq, desc_addr, virtio_hdr); + vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr, vq->vhost_hlen); + PRINT_PACKET(dev, (uintptr_t)desc_addr, vq->vhost_hlen, 0); + + desc_avail = vq->buf_vec[vec_idx].buf_len - vq->vhost_hlen; + desc_offset = vq->vhost_hlen; + + mbuf_avail = rte_pktmbuf_data_len(m); + mbuf_offset = 0; + while (mbuf_avail != 0 || m->next != NULL) { + /* done with current desc buf, get the next one */ + if (desc_avail == 0) { + desc_idx = vq->buf_vec[vec_idx].desc_idx; + + if (!(vq->desc[desc_idx].flags & VRING_DESC_F_NEXT)) { + /* Update used ring with desc information */ + used_idx = cur_idx++ & (vq->size - 1); + vq->used->ring[used_idx].id = desc_idx; + vq->used->ring[used_idx].len = desc_offset; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, + ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } + + vec_idx++; + desc_addr = gpa_to_vva(dev, vq->buf_vec[vec_idx].buf_addr); + + /* Prefetch buffer address. */ + rte_prefetch0((void *)(uintptr_t)desc_addr); + desc_offset = 0; + desc_avail = vq->buf_vec[vec_idx].buf_len; + } + + /* done with current mbuf, get the next one */ + if (mbuf_avail == 0) { + m = m->next; + + mbuf_offset = 0; + mbuf_avail = rte_pktmbuf_data_len(m); + } + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + rte_memcpy((void *)((uintptr_t)(desc_addr + desc_offset)), + rte_pktmbuf_mtod_offset(m, void *, mbuf_offset), + cpy_len); + vhost_log_write(dev, vq->buf_vec[vec_idx].buf_addr + desc_offset, + cpy_len); + PRINT_PACKET(dev, (uintptr_t)(desc_addr + desc_offset), + cpy_len, 0); + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + } + + used_idx = cur_idx & (vq->size - 1); + vq->used->ring[used_idx].id = vq->buf_vec[vec_idx].desc_idx; + vq->used->ring[used_idx].len = desc_offset; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + + return res_end_idx - res_start_idx; +} + +static inline uint32_t __attribute__((always_inline)) +virtio_dev_merge_rx(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint32_t count) +{ + struct vhost_virtqueue *vq; + uint32_t pkt_idx = 0, nr_used = 0; + uint16_t start, end; + + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") virtio_dev_merge_rx()\n", + dev->device_fh); + if (unlikely(!is_valid_virt_queue_idx(queue_id, 0, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + + count = RTE_MIN((uint32_t)MAX_PKT_BURST, count); + if (count == 0) + return 0; + + for (pkt_idx = 0; pkt_idx < count; pkt_idx++) { + uint32_t pkt_len = pkts[pkt_idx]->pkt_len + vq->vhost_hlen; + + if (unlikely(reserve_avail_buf_mergeable(vq, pkt_len, + &start, &end) < 0)) { + LOG_DEBUG(VHOST_DATA, + "(%" PRIu64 ") Failed to get enough desc from vring\n", + dev->device_fh); + break; + } + + nr_used = copy_mbuf_to_desc_mergeable(dev, vq, start, end, + pkts[pkt_idx]); + rte_smp_wmb(); + + /* + * Wait until it's our turn to add our buffer + * to the used ring. + */ + while (unlikely(vq->last_used_idx != start)) + rte_pause(); + + *(volatile uint16_t *)&vq->used->idx += nr_used; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); + vq->last_used_idx = end; + } + + if (likely(pkt_idx)) { + /* flush used->idx update before we read avail->flags. */ + rte_mb(); + + /* Kick the guest if necessary. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) + && (vq->callfd >= 0)) + eventfd_write(vq->callfd, (eventfd_t)1); + } + + return pkt_idx; +} + +uint16_t +rte_vhost_enqueue_burst(struct virtio_net *dev, uint16_t queue_id, + struct rte_mbuf **pkts, uint16_t count) +{ + if (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) + return virtio_dev_merge_rx(dev, queue_id, pkts, count); + else + return virtio_dev_rx(dev, queue_id, pkts, count); +} + +static void +parse_ethernet(struct rte_mbuf *m, uint16_t *l4_proto, void **l4_hdr) +{ + struct ipv4_hdr *ipv4_hdr; + struct ipv6_hdr *ipv6_hdr; + void *l3_hdr = NULL; + struct ether_hdr *eth_hdr; + uint16_t ethertype; + + eth_hdr = rte_pktmbuf_mtod(m, struct ether_hdr *); + + m->l2_len = sizeof(struct ether_hdr); + ethertype = rte_be_to_cpu_16(eth_hdr->ether_type); + + if (ethertype == ETHER_TYPE_VLAN) { + struct vlan_hdr *vlan_hdr = (struct vlan_hdr *)(eth_hdr + 1); + + m->l2_len += sizeof(struct vlan_hdr); + ethertype = rte_be_to_cpu_16(vlan_hdr->eth_proto); + } + + l3_hdr = (char *)eth_hdr + m->l2_len; + + switch (ethertype) { + case ETHER_TYPE_IPv4: + ipv4_hdr = (struct ipv4_hdr *)l3_hdr; + *l4_proto = ipv4_hdr->next_proto_id; + m->l3_len = (ipv4_hdr->version_ihl & 0x0f) * 4; + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV4; + break; + case ETHER_TYPE_IPv6: + ipv6_hdr = (struct ipv6_hdr *)l3_hdr; + *l4_proto = ipv6_hdr->proto; + m->l3_len = sizeof(struct ipv6_hdr); + *l4_hdr = (char *)l3_hdr + m->l3_len; + m->ol_flags |= PKT_TX_IPV6; + break; + default: + m->l3_len = 0; + *l4_proto = 0; + break; + } +} + +static inline void __attribute__((always_inline)) +vhost_dequeue_offload(struct virtio_net_hdr *hdr, struct rte_mbuf *m) +{ + uint16_t l4_proto = 0; + void *l4_hdr = NULL; + struct tcp_hdr *tcp_hdr = NULL; + + parse_ethernet(m, &l4_proto, &l4_hdr); + if (hdr->flags == VIRTIO_NET_HDR_F_NEEDS_CSUM) { + if (hdr->csum_start == (m->l2_len + m->l3_len)) { + switch (hdr->csum_offset) { + case (offsetof(struct tcp_hdr, cksum)): + if (l4_proto == IPPROTO_TCP) + m->ol_flags |= PKT_TX_TCP_CKSUM; + break; + case (offsetof(struct udp_hdr, dgram_cksum)): + if (l4_proto == IPPROTO_UDP) + m->ol_flags |= PKT_TX_UDP_CKSUM; + break; + case (offsetof(struct sctp_hdr, cksum)): + if (l4_proto == IPPROTO_SCTP) + m->ol_flags |= PKT_TX_SCTP_CKSUM; + break; + default: + break; + } + } + } + + if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { + switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { + case VIRTIO_NET_HDR_GSO_TCPV4: + case VIRTIO_NET_HDR_GSO_TCPV6: + tcp_hdr = (struct tcp_hdr *)l4_hdr; + m->ol_flags |= PKT_TX_TCP_SEG; + m->tso_segsz = hdr->gso_size; + m->l4_len = (tcp_hdr->data_off & 0xf0) >> 2; + break; + default: + RTE_LOG(WARNING, VHOST_DATA, + "unsupported gso type %u.\n", hdr->gso_type); + break; + } + } +} + +#define RARP_PKT_SIZE 64 + +static int +make_rarp_packet(struct rte_mbuf *rarp_mbuf, const struct ether_addr *mac) +{ + struct ether_hdr *eth_hdr; + struct arp_hdr *rarp; + + if (rarp_mbuf->buf_len < 64) { + RTE_LOG(WARNING, VHOST_DATA, + "failed to make RARP; mbuf size too small %u (< %d)\n", + rarp_mbuf->buf_len, RARP_PKT_SIZE); + return -1; + } + + /* Ethernet header. */ + eth_hdr = rte_pktmbuf_mtod_offset(rarp_mbuf, struct ether_hdr *, 0); + memset(eth_hdr->d_addr.addr_bytes, 0xff, ETHER_ADDR_LEN); + ether_addr_copy(mac, ð_hdr->s_addr); + eth_hdr->ether_type = htons(ETHER_TYPE_RARP); + + /* RARP header. */ + rarp = (struct arp_hdr *)(eth_hdr + 1); + rarp->arp_hrd = htons(ARP_HRD_ETHER); + rarp->arp_pro = htons(ETHER_TYPE_IPv4); + rarp->arp_hln = ETHER_ADDR_LEN; + rarp->arp_pln = 4; + rarp->arp_op = htons(ARP_OP_REVREQUEST); + + ether_addr_copy(mac, &rarp->arp_data.arp_sha); + ether_addr_copy(mac, &rarp->arp_data.arp_tha); + memset(&rarp->arp_data.arp_sip, 0x00, 4); + memset(&rarp->arp_data.arp_tip, 0x00, 4); + + rarp_mbuf->pkt_len = rarp_mbuf->data_len = RARP_PKT_SIZE; + + return 0; +} + +static inline int __attribute__((always_inline)) +copy_desc_to_mbuf(struct virtio_net *dev, struct vhost_virtqueue *vq, + struct rte_mbuf *m, uint16_t desc_idx, + struct rte_mempool *mbuf_pool) +{ + struct vring_desc *desc; + uint64_t desc_addr; + uint32_t desc_avail, desc_offset; + uint32_t mbuf_avail, mbuf_offset; + uint32_t cpy_len; + struct rte_mbuf *cur = m, *prev = m; + struct virtio_net_hdr *hdr; + /* A counter to avoid desc dead loop chain */ + uint32_t nr_desc = 1; + + desc = &vq->desc[desc_idx]; + if (unlikely(desc->len < vq->vhost_hlen)) + return -1; + + desc_addr = gpa_to_vva(dev, desc->addr); + rte_prefetch0((void *)(uintptr_t)desc_addr); + + /* Retrieve virtio net header */ + hdr = (struct virtio_net_hdr *)((uintptr_t)desc_addr); + desc_avail = desc->len - vq->vhost_hlen; + desc_offset = vq->vhost_hlen; + + mbuf_offset = 0; + mbuf_avail = m->buf_len - RTE_PKTMBUF_HEADROOM; + while (desc_avail != 0 || (desc->flags & VRING_DESC_F_NEXT) != 0) { + /* This desc reaches to its end, get the next one */ + if (desc_avail == 0) { + if (unlikely(desc->next >= vq->size || + ++nr_desc >= vq->size)) + return -1; + desc = &vq->desc[desc->next]; + + desc_addr = gpa_to_vva(dev, desc->addr); + rte_prefetch0((void *)(uintptr_t)desc_addr); + + desc_offset = 0; + desc_avail = desc->len; + + PRINT_PACKET(dev, (uintptr_t)desc_addr, desc->len, 0); + } + + /* + * This mbuf reaches to its end, get a new one + * to hold more data. + */ + if (mbuf_avail == 0) { + cur = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(cur == NULL)) { + RTE_LOG(ERR, VHOST_DATA, "Failed to " + "allocate memory for mbuf.\n"); + return -1; + } + + prev->next = cur; + prev->data_len = mbuf_offset; + m->nb_segs += 1; + m->pkt_len += mbuf_offset; + prev = cur; + + mbuf_offset = 0; + mbuf_avail = cur->buf_len - RTE_PKTMBUF_HEADROOM; + } + + cpy_len = RTE_MIN(desc_avail, mbuf_avail); + rte_memcpy(rte_pktmbuf_mtod_offset(cur, void *, mbuf_offset), + (void *)((uintptr_t)(desc_addr + desc_offset)), + cpy_len); + + mbuf_avail -= cpy_len; + mbuf_offset += cpy_len; + desc_avail -= cpy_len; + desc_offset += cpy_len; + } + + prev->data_len = mbuf_offset; + m->pkt_len += mbuf_offset; + + if (hdr->flags != 0 || hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) + vhost_dequeue_offload(hdr, m); + + return 0; +} + +uint16_t +rte_vhost_dequeue_burst(struct virtio_net *dev, uint16_t queue_id, + struct rte_mempool *mbuf_pool, struct rte_mbuf **pkts, uint16_t count) +{ + struct rte_mbuf *rarp_mbuf = NULL; + struct vhost_virtqueue *vq; + uint32_t desc_indexes[MAX_PKT_BURST]; + uint32_t used_idx; + uint32_t i = 0; + uint16_t free_entries; + uint16_t avail_idx; + + if (unlikely(!is_valid_virt_queue_idx(queue_id, 1, dev->virt_qp_nb))) { + RTE_LOG(ERR, VHOST_DATA, + "%s (%"PRIu64"): virtqueue idx:%d invalid.\n", + __func__, dev->device_fh, queue_id); + return 0; + } + + vq = dev->virtqueue[queue_id]; + if (unlikely(vq->enabled == 0)) + return 0; + + /* + * Construct a RARP broadcast packet, and inject it to the "pkts" + * array, to looks like that guest actually send such packet. + * + * Check user_send_rarp() for more information. + */ + if (unlikely(rte_atomic16_cmpset((volatile uint16_t *) + &dev->broadcast_rarp.cnt, 1, 0))) { + rarp_mbuf = rte_pktmbuf_alloc(mbuf_pool); + if (rarp_mbuf == NULL) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + return 0; + } + + if (make_rarp_packet(rarp_mbuf, &dev->mac)) { + rte_pktmbuf_free(rarp_mbuf); + rarp_mbuf = NULL; + } else { + count -= 1; + } + } + + avail_idx = *((volatile uint16_t *)&vq->avail->idx); + free_entries = avail_idx - vq->last_used_idx; + if (free_entries == 0) + goto out; + + LOG_DEBUG(VHOST_DATA, "%s (%"PRIu64")\n", __func__, dev->device_fh); + + /* Prefetch available ring to retrieve head indexes. */ + used_idx = vq->last_used_idx & (vq->size - 1); + rte_prefetch0(&vq->avail->ring[used_idx]); + + count = RTE_MIN(count, MAX_PKT_BURST); + count = RTE_MIN(count, free_entries); + LOG_DEBUG(VHOST_DATA, "(%"PRIu64") about to dequeue %u buffers\n", + dev->device_fh, count); + + /* Retrieve all of the head indexes first to avoid caching issues. */ + for (i = 0; i < count; i++) { + desc_indexes[i] = vq->avail->ring[(vq->last_used_idx + i) & + (vq->size - 1)]; + } + + /* Prefetch descriptor index. */ + rte_prefetch0(&vq->desc[desc_indexes[0]]); + rte_prefetch0(&vq->used->ring[vq->last_used_idx & (vq->size - 1)]); + + for (i = 0; i < count; i++) { + int err; + + if (likely(i + 1 < count)) { + rte_prefetch0(&vq->desc[desc_indexes[i + 1]]); + rte_prefetch0(&vq->used->ring[(used_idx + 1) & + (vq->size - 1)]); + } + + pkts[i] = rte_pktmbuf_alloc(mbuf_pool); + if (unlikely(pkts[i] == NULL)) { + RTE_LOG(ERR, VHOST_DATA, + "Failed to allocate memory for mbuf.\n"); + break; + } + err = copy_desc_to_mbuf(dev, vq, pkts[i], desc_indexes[i], + mbuf_pool); + if (unlikely(err)) { + rte_pktmbuf_free(pkts[i]); + break; + } + + used_idx = vq->last_used_idx++ & (vq->size - 1); + vq->used->ring[used_idx].id = desc_indexes[i]; + vq->used->ring[used_idx].len = 0; + vhost_log_used_vring(dev, vq, + offsetof(struct vring_used, ring[used_idx]), + sizeof(vq->used->ring[used_idx])); + } + + rte_smp_wmb(); + rte_smp_rmb(); + vq->used->idx += i; + vhost_log_used_vring(dev, vq, offsetof(struct vring_used, idx), + sizeof(vq->used->idx)); + + /* Kick guest if required. */ + if (!(vq->avail->flags & VRING_AVAIL_F_NO_INTERRUPT) + && (vq->callfd >= 0)) + eventfd_write(vq->callfd, (eventfd_t)1); + +out: + if (unlikely(rarp_mbuf != NULL)) { + /* + * Inject it to the head of "pkts" array, so that switch's mac + * learning table will get updated first. + */ + memmove(&pkts[1], pkts, i * sizeof(struct rte_mbuf *)); + pkts[0] = rarp_mbuf; + i += 1; + } + + return i; +} diff --git a/lib/librte_vhost/vhost_user/fd_man.c b/lib/librte_vhost/vhost_user/fd_man.c new file mode 100644 index 00000000..087aaed6 --- /dev/null +++ b/lib/librte_vhost/vhost_user/fd_man.c @@ -0,0 +1,289 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fd_man.h" + +/** + * Returns the index in the fdset for a given fd. + * If fd is -1, it means to search for a free entry. + * @return + * index for the fd, or -1 if fd isn't in the fdset. + */ +static int +fdset_find_fd(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL) + return -1; + + for (i = 0; i < MAX_FDS && pfdset->fd[i].fd != fd; i++) + ; + + return i == MAX_FDS ? -1 : i; +} + +static int +fdset_find_free_slot(struct fdset *pfdset) +{ + return fdset_find_fd(pfdset, -1); +} + +static void +fdset_add_fd(struct fdset *pfdset, int idx, int fd, + fd_cb rcb, fd_cb wcb, void *dat) +{ + struct fdentry *pfdentry; + + if (pfdset == NULL || idx >= MAX_FDS) + return; + + pfdentry = &pfdset->fd[idx]; + pfdentry->fd = fd; + pfdentry->rcb = rcb; + pfdentry->wcb = wcb; + pfdentry->dat = dat; +} + +/** + * Fill the read/write fd_set with the fds in the fdset. + * @return + * the maximum fds filled in the read/write fd_set. + */ +static int +fdset_fill(fd_set *rfset, fd_set *wfset, struct fdset *pfdset) +{ + struct fdentry *pfdentry; + int i, maxfds = -1; + int num = MAX_FDS; + + if (pfdset == NULL) + return -1; + + for (i = 0; i < num; i++) { + pfdentry = &pfdset->fd[i]; + if (pfdentry->fd != -1) { + int added = 0; + if (pfdentry->rcb && rfset) { + FD_SET(pfdentry->fd, rfset); + added = 1; + } + if (pfdentry->wcb && wfset) { + FD_SET(pfdentry->fd, wfset); + added = 1; + } + if (added) + maxfds = pfdentry->fd < maxfds ? + maxfds : pfdentry->fd; + } + } + return maxfds; +} + +void +fdset_init(struct fdset *pfdset) +{ + int i; + + if (pfdset == NULL) + return; + + for (i = 0; i < MAX_FDS; i++) + pfdset->fd[i].fd = -1; + pfdset->num = 0; +} + +/** + * Register the fd in the fdset with read/write handler and context. + */ +int +fdset_add(struct fdset *pfdset, int fd, fd_cb rcb, fd_cb wcb, void *dat) +{ + int i; + + if (pfdset == NULL || fd == -1) + return -1; + + pthread_mutex_lock(&pfdset->fd_mutex); + + /* Find a free slot in the list. */ + i = fdset_find_free_slot(pfdset); + if (i == -1) { + pthread_mutex_unlock(&pfdset->fd_mutex); + return -2; + } + + fdset_add_fd(pfdset, i, fd, rcb, wcb, dat); + pfdset->num++; + + pthread_mutex_unlock(&pfdset->fd_mutex); + + return 0; +} + +/** + * Unregister the fd from the fdset. + */ +void +fdset_del(struct fdset *pfdset, int fd) +{ + int i; + + if (pfdset == NULL || fd == -1) + return; + + do { + pthread_mutex_lock(&pfdset->fd_mutex); + + i = fdset_find_fd(pfdset, fd); + if (i != -1 && pfdset->fd[i].busy == 0) { + /* busy indicates r/wcb is executing! */ + pfdset->fd[i].fd = -1; + pfdset->fd[i].rcb = pfdset->fd[i].wcb = NULL; + pfdset->num--; + i = -1; + } + pthread_mutex_unlock(&pfdset->fd_mutex); + } while (i != -1); +} + +/** + * Unregister the fd at the specified slot from the fdset. + */ +static void +fdset_del_slot(struct fdset *pfdset, int index) +{ + if (pfdset == NULL || index < 0 || index >= MAX_FDS) + return; + + pthread_mutex_lock(&pfdset->fd_mutex); + + pfdset->fd[index].fd = -1; + pfdset->fd[index].rcb = pfdset->fd[index].wcb = NULL; + pfdset->num--; + + pthread_mutex_unlock(&pfdset->fd_mutex); +} + +/** + * This functions runs in infinite blocking loop until there is no fd in + * pfdset. It calls corresponding r/w handler if there is event on the fd. + * + * Before the callback is called, we set the flag to busy status; If other + * thread(now rte_vhost_driver_unregister) calls fdset_del concurrently, it + * will wait until the flag is reset to zero(which indicates the callback is + * finished), then it could free the context after fdset_del. + */ +void +fdset_event_dispatch(struct fdset *pfdset) +{ + fd_set rfds, wfds; + int i, maxfds; + struct fdentry *pfdentry; + int num = MAX_FDS; + fd_cb rcb, wcb; + void *dat; + int fd; + int remove1, remove2; + int ret; + + if (pfdset == NULL) + return; + + while (1) { + struct timeval tv; + tv.tv_sec = 1; + tv.tv_usec = 0; + FD_ZERO(&rfds); + FD_ZERO(&wfds); + pthread_mutex_lock(&pfdset->fd_mutex); + + maxfds = fdset_fill(&rfds, &wfds, pfdset); + + pthread_mutex_unlock(&pfdset->fd_mutex); + + /* + * When select is blocked, other threads might unregister + * listenfds from and register new listenfds into fdset. + * When select returns, the entries for listenfds in the fdset + * might have been updated. It is ok if there is unwanted call + * for new listenfds. + */ + ret = select(maxfds + 1, &rfds, &wfds, NULL, &tv); + if (ret <= 0) + continue; + + for (i = 0; i < num; i++) { + remove1 = remove2 = 0; + pthread_mutex_lock(&pfdset->fd_mutex); + pfdentry = &pfdset->fd[i]; + fd = pfdentry->fd; + rcb = pfdentry->rcb; + wcb = pfdentry->wcb; + dat = pfdentry->dat; + pfdentry->busy = 1; + pthread_mutex_unlock(&pfdset->fd_mutex); + if (fd >= 0 && FD_ISSET(fd, &rfds) && rcb) + rcb(fd, dat, &remove1); + if (fd >= 0 && FD_ISSET(fd, &wfds) && wcb) + wcb(fd, dat, &remove2); + pfdentry->busy = 0; + /* + * fdset_del needs to check busy flag. + * We don't allow fdset_del to be called in callback + * directly. + */ + /* + * When we are to clean up the fd from fdset, + * because the fd is closed in the cb, + * the old fd val could be reused by when creates new + * listen fd in another thread, we couldn't call + * fd_set_del. + */ + if (remove1 || remove2) + fdset_del_slot(pfdset, i); + } + } +} diff --git a/lib/librte_vhost/vhost_user/fd_man.h b/lib/librte_vhost/vhost_user/fd_man.h new file mode 100644 index 00000000..74ecde2c --- /dev/null +++ b/lib/librte_vhost/vhost_user/fd_man.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _FD_MAN_H_ +#define _FD_MAN_H_ +#include +#include + +#define MAX_FDS 1024 + +typedef void (*fd_cb)(int fd, void *dat, int *remove); + +struct fdentry { + int fd; /* -1 indicates this entry is empty */ + fd_cb rcb; /* callback when this fd is readable. */ + fd_cb wcb; /* callback when this fd is writeable.*/ + void *dat; /* fd context */ + int busy; /* whether this entry is being used in cb. */ +}; + +struct fdset { + struct fdentry fd[MAX_FDS]; + pthread_mutex_t fd_mutex; + int num; /* current fd number of this fdset */ +}; + + +void fdset_init(struct fdset *pfdset); + +int fdset_add(struct fdset *pfdset, int fd, + fd_cb rcb, fd_cb wcb, void *dat); + +void fdset_del(struct fdset *pfdset, int fd); + +void fdset_event_dispatch(struct fdset *pfdset); + +#endif diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.c b/lib/librte_vhost/vhost_user/vhost-net-user.c new file mode 100644 index 00000000..df2bd648 --- /dev/null +++ b/lib/librte_vhost/vhost_user/vhost-net-user.c @@ -0,0 +1,531 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fd_man.h" +#include "vhost-net-user.h" +#include "vhost-net.h" +#include "virtio-net-user.h" + +#define MAX_VIRTIO_BACKLOG 128 + +static void vserver_new_vq_conn(int fd, void *data, int *remove); +static void vserver_message_handler(int fd, void *dat, int *remove); + +struct connfd_ctx { + struct vhost_server *vserver; + uint32_t fh; +}; + +#define MAX_VHOST_SERVER 1024 +struct _vhost_server { + struct vhost_server *server[MAX_VHOST_SERVER]; + struct fdset fdset; + int vserver_cnt; + pthread_mutex_t server_mutex; +}; + +static struct _vhost_server g_vhost_server = { + .fdset = { + .fd = { [0 ... MAX_FDS - 1] = {-1, NULL, NULL, NULL, 0} }, + .fd_mutex = PTHREAD_MUTEX_INITIALIZER, + .num = 0 + }, + .vserver_cnt = 0, + .server_mutex = PTHREAD_MUTEX_INITIALIZER, +}; + +static const char *vhost_message_str[VHOST_USER_MAX] = { + [VHOST_USER_NONE] = "VHOST_USER_NONE", + [VHOST_USER_GET_FEATURES] = "VHOST_USER_GET_FEATURES", + [VHOST_USER_SET_FEATURES] = "VHOST_USER_SET_FEATURES", + [VHOST_USER_SET_OWNER] = "VHOST_USER_SET_OWNER", + [VHOST_USER_RESET_OWNER] = "VHOST_USER_RESET_OWNER", + [VHOST_USER_SET_MEM_TABLE] = "VHOST_USER_SET_MEM_TABLE", + [VHOST_USER_SET_LOG_BASE] = "VHOST_USER_SET_LOG_BASE", + [VHOST_USER_SET_LOG_FD] = "VHOST_USER_SET_LOG_FD", + [VHOST_USER_SET_VRING_NUM] = "VHOST_USER_SET_VRING_NUM", + [VHOST_USER_SET_VRING_ADDR] = "VHOST_USER_SET_VRING_ADDR", + [VHOST_USER_SET_VRING_BASE] = "VHOST_USER_SET_VRING_BASE", + [VHOST_USER_GET_VRING_BASE] = "VHOST_USER_GET_VRING_BASE", + [VHOST_USER_SET_VRING_KICK] = "VHOST_USER_SET_VRING_KICK", + [VHOST_USER_SET_VRING_CALL] = "VHOST_USER_SET_VRING_CALL", + [VHOST_USER_SET_VRING_ERR] = "VHOST_USER_SET_VRING_ERR", + [VHOST_USER_GET_PROTOCOL_FEATURES] = "VHOST_USER_GET_PROTOCOL_FEATURES", + [VHOST_USER_SET_PROTOCOL_FEATURES] = "VHOST_USER_SET_PROTOCOL_FEATURES", + [VHOST_USER_GET_QUEUE_NUM] = "VHOST_USER_GET_QUEUE_NUM", + [VHOST_USER_SET_VRING_ENABLE] = "VHOST_USER_SET_VRING_ENABLE", + [VHOST_USER_SEND_RARP] = "VHOST_USER_SEND_RARP", +}; + +/** + * Create a unix domain socket, bind to path and listen for connection. + * @return + * socket fd or -1 on failure + */ +static int +uds_socket(const char *path) +{ + struct sockaddr_un un; + int sockfd; + int ret; + + if (path == NULL) + return -1; + + sockfd = socket(AF_UNIX, SOCK_STREAM, 0); + if (sockfd < 0) + return -1; + RTE_LOG(INFO, VHOST_CONFIG, "socket created, fd:%d\n", sockfd); + + memset(&un, 0, sizeof(un)); + un.sun_family = AF_UNIX; + snprintf(un.sun_path, sizeof(un.sun_path), "%s", path); + ret = bind(sockfd, (struct sockaddr *)&un, sizeof(un)); + if (ret == -1) { + RTE_LOG(ERR, VHOST_CONFIG, "fail to bind fd:%d, remove file:%s and try again.\n", + sockfd, path); + goto err; + } + RTE_LOG(INFO, VHOST_CONFIG, "bind to %s\n", path); + + ret = listen(sockfd, MAX_VIRTIO_BACKLOG); + if (ret == -1) + goto err; + + return sockfd; + +err: + close(sockfd); + return -1; +} + +/* return bytes# of read on success or negative val on failure. */ +static int +read_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) +{ + struct iovec iov; + struct msghdr msgh; + size_t fdsize = fd_num * sizeof(int); + char control[CMSG_SPACE(fdsize)]; + struct cmsghdr *cmsg; + int ret; + + memset(&msgh, 0, sizeof(msgh)); + iov.iov_base = buf; + iov.iov_len = buflen; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + + ret = recvmsg(sockfd, &msgh, 0); + if (ret <= 0) { + RTE_LOG(ERR, VHOST_CONFIG, "recvmsg failed\n"); + return ret; + } + + if (msgh.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { + RTE_LOG(ERR, VHOST_CONFIG, "truncted msg\n"); + return -1; + } + + for (cmsg = CMSG_FIRSTHDR(&msgh); cmsg != NULL; + cmsg = CMSG_NXTHDR(&msgh, cmsg)) { + if ((cmsg->cmsg_level == SOL_SOCKET) && + (cmsg->cmsg_type == SCM_RIGHTS)) { + memcpy(fds, CMSG_DATA(cmsg), fdsize); + break; + } + } + + return ret; +} + +/* return bytes# of read on success or negative val on failure. */ +static int +read_vhost_message(int sockfd, struct VhostUserMsg *msg) +{ + int ret; + + ret = read_fd_message(sockfd, (char *)msg, VHOST_USER_HDR_SIZE, + msg->fds, VHOST_MEMORY_MAX_NREGIONS); + if (ret <= 0) + return ret; + + if (msg && msg->size) { + if (msg->size > sizeof(msg->payload)) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid msg size: %d\n", msg->size); + return -1; + } + ret = read(sockfd, &msg->payload, msg->size); + if (ret <= 0) + return ret; + if (ret != (int)msg->size) { + RTE_LOG(ERR, VHOST_CONFIG, + "read control message failed\n"); + return -1; + } + } + + return ret; +} + +static int +send_fd_message(int sockfd, char *buf, int buflen, int *fds, int fd_num) +{ + + struct iovec iov; + struct msghdr msgh; + size_t fdsize = fd_num * sizeof(int); + char control[CMSG_SPACE(fdsize)]; + struct cmsghdr *cmsg; + int ret; + + memset(&msgh, 0, sizeof(msgh)); + iov.iov_base = buf; + iov.iov_len = buflen; + + msgh.msg_iov = &iov; + msgh.msg_iovlen = 1; + + if (fds && fd_num > 0) { + msgh.msg_control = control; + msgh.msg_controllen = sizeof(control); + cmsg = CMSG_FIRSTHDR(&msgh); + cmsg->cmsg_len = CMSG_LEN(fdsize); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), fds, fdsize); + } else { + msgh.msg_control = NULL; + msgh.msg_controllen = 0; + } + + do { + ret = sendmsg(sockfd, &msgh, 0); + } while (ret < 0 && errno == EINTR); + + if (ret < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "sendmsg error\n"); + return ret; + } + + return ret; +} + +static int +send_vhost_message(int sockfd, struct VhostUserMsg *msg) +{ + int ret; + + if (!msg) + return 0; + + msg->flags &= ~VHOST_USER_VERSION_MASK; + msg->flags |= VHOST_USER_VERSION; + msg->flags |= VHOST_USER_REPLY_MASK; + + ret = send_fd_message(sockfd, (char *)msg, + VHOST_USER_HDR_SIZE + msg->size, NULL, 0); + + return ret; +} + +/* call back when there is new virtio connection. */ +static void +vserver_new_vq_conn(int fd, void *dat, __rte_unused int *remove) +{ + struct vhost_server *vserver = (struct vhost_server *)dat; + int conn_fd; + struct connfd_ctx *ctx; + int fh; + struct vhost_device_ctx vdev_ctx = { (pid_t)0, 0 }; + unsigned int size; + + conn_fd = accept(fd, NULL, NULL); + RTE_LOG(INFO, VHOST_CONFIG, + "new virtio connection is %d\n", conn_fd); + if (conn_fd < 0) + return; + + ctx = calloc(1, sizeof(*ctx)); + if (ctx == NULL) { + close(conn_fd); + return; + } + + fh = vhost_new_device(vdev_ctx); + if (fh == -1) { + free(ctx); + close(conn_fd); + return; + } + + vdev_ctx.fh = fh; + size = strnlen(vserver->path, PATH_MAX); + vhost_set_ifname(vdev_ctx, vserver->path, + size); + + RTE_LOG(INFO, VHOST_CONFIG, "new device, handle is %d\n", fh); + + ctx->vserver = vserver; + ctx->fh = fh; + fdset_add(&g_vhost_server.fdset, + conn_fd, vserver_message_handler, NULL, ctx); +} + +/* callback when there is message on the connfd */ +static void +vserver_message_handler(int connfd, void *dat, int *remove) +{ + struct vhost_device_ctx ctx; + struct connfd_ctx *cfd_ctx = (struct connfd_ctx *)dat; + struct VhostUserMsg msg; + uint64_t features; + int ret; + + ctx.fh = cfd_ctx->fh; + ret = read_vhost_message(connfd, &msg); + if (ret <= 0 || msg.request >= VHOST_USER_MAX) { + if (ret < 0) + RTE_LOG(ERR, VHOST_CONFIG, + "vhost read message failed\n"); + else if (ret == 0) + RTE_LOG(INFO, VHOST_CONFIG, + "vhost peer closed\n"); + else + RTE_LOG(ERR, VHOST_CONFIG, + "vhost read incorrect message\n"); + + close(connfd); + *remove = 1; + free(cfd_ctx); + vhost_destroy_device(ctx); + + return; + } + + RTE_LOG(INFO, VHOST_CONFIG, "read message %s\n", + vhost_message_str[msg.request]); + switch (msg.request) { + case VHOST_USER_GET_FEATURES: + ret = vhost_get_features(ctx, &features); + msg.payload.u64 = features; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + case VHOST_USER_SET_FEATURES: + features = msg.payload.u64; + vhost_set_features(ctx, &features); + break; + + case VHOST_USER_GET_PROTOCOL_FEATURES: + msg.payload.u64 = VHOST_USER_PROTOCOL_FEATURES; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + case VHOST_USER_SET_PROTOCOL_FEATURES: + user_set_protocol_features(ctx, msg.payload.u64); + break; + + case VHOST_USER_SET_OWNER: + vhost_set_owner(ctx); + break; + case VHOST_USER_RESET_OWNER: + vhost_reset_owner(ctx); + break; + + case VHOST_USER_SET_MEM_TABLE: + user_set_mem_table(ctx, &msg); + break; + + case VHOST_USER_SET_LOG_BASE: + user_set_log_base(ctx, &msg); + + /* it needs a reply */ + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + case VHOST_USER_SET_LOG_FD: + close(msg.fds[0]); + RTE_LOG(INFO, VHOST_CONFIG, "not implemented.\n"); + break; + + case VHOST_USER_SET_VRING_NUM: + vhost_set_vring_num(ctx, &msg.payload.state); + break; + case VHOST_USER_SET_VRING_ADDR: + vhost_set_vring_addr(ctx, &msg.payload.addr); + break; + case VHOST_USER_SET_VRING_BASE: + vhost_set_vring_base(ctx, &msg.payload.state); + break; + + case VHOST_USER_GET_VRING_BASE: + ret = user_get_vring_base(ctx, &msg.payload.state); + msg.size = sizeof(msg.payload.state); + send_vhost_message(connfd, &msg); + break; + + case VHOST_USER_SET_VRING_KICK: + user_set_vring_kick(ctx, &msg); + break; + case VHOST_USER_SET_VRING_CALL: + user_set_vring_call(ctx, &msg); + break; + + case VHOST_USER_SET_VRING_ERR: + if (!(msg.payload.u64 & VHOST_USER_VRING_NOFD_MASK)) + close(msg.fds[0]); + RTE_LOG(INFO, VHOST_CONFIG, "not implemented\n"); + break; + + case VHOST_USER_GET_QUEUE_NUM: + msg.payload.u64 = VHOST_MAX_QUEUE_PAIRS; + msg.size = sizeof(msg.payload.u64); + send_vhost_message(connfd, &msg); + break; + + case VHOST_USER_SET_VRING_ENABLE: + user_set_vring_enable(ctx, &msg.payload.state); + break; + case VHOST_USER_SEND_RARP: + user_send_rarp(ctx, &msg); + break; + + default: + break; + + } +} + +/** + * Creates and initialise the vhost server. + */ +int +rte_vhost_driver_register(const char *path) +{ + struct vhost_server *vserver; + + pthread_mutex_lock(&g_vhost_server.server_mutex); + + if (g_vhost_server.vserver_cnt == MAX_VHOST_SERVER) { + RTE_LOG(ERR, VHOST_CONFIG, + "error: the number of servers reaches maximum\n"); + pthread_mutex_unlock(&g_vhost_server.server_mutex); + return -1; + } + + vserver = calloc(sizeof(struct vhost_server), 1); + if (vserver == NULL) { + pthread_mutex_unlock(&g_vhost_server.server_mutex); + return -1; + } + + vserver->listenfd = uds_socket(path); + if (vserver->listenfd < 0) { + free(vserver); + pthread_mutex_unlock(&g_vhost_server.server_mutex); + return -1; + } + + vserver->path = strdup(path); + + fdset_add(&g_vhost_server.fdset, vserver->listenfd, + vserver_new_vq_conn, NULL, vserver); + + g_vhost_server.server[g_vhost_server.vserver_cnt++] = vserver; + pthread_mutex_unlock(&g_vhost_server.server_mutex); + + return 0; +} + + +/** + * Unregister the specified vhost server + */ +int +rte_vhost_driver_unregister(const char *path) +{ + int i; + int count; + + pthread_mutex_lock(&g_vhost_server.server_mutex); + + for (i = 0; i < g_vhost_server.vserver_cnt; i++) { + if (!strcmp(g_vhost_server.server[i]->path, path)) { + fdset_del(&g_vhost_server.fdset, + g_vhost_server.server[i]->listenfd); + + close(g_vhost_server.server[i]->listenfd); + free(g_vhost_server.server[i]->path); + free(g_vhost_server.server[i]); + + unlink(path); + + count = --g_vhost_server.vserver_cnt; + g_vhost_server.server[i] = g_vhost_server.server[count]; + g_vhost_server.server[count] = NULL; + pthread_mutex_unlock(&g_vhost_server.server_mutex); + + return 0; + } + } + pthread_mutex_unlock(&g_vhost_server.server_mutex); + + return -1; +} + +int +rte_vhost_driver_session_start(void) +{ + fdset_event_dispatch(&g_vhost_server.fdset); + return 0; +} diff --git a/lib/librte_vhost/vhost_user/vhost-net-user.h b/lib/librte_vhost/vhost_user/vhost-net-user.h new file mode 100644 index 00000000..e3bb4138 --- /dev/null +++ b/lib/librte_vhost/vhost_user/vhost-net-user.h @@ -0,0 +1,117 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VHOST_NET_USER_H +#define _VHOST_NET_USER_H + +#include +#include + +#include "rte_virtio_net.h" +#include "fd_man.h" + +struct vhost_server { + char *path; /**< The path the uds is bind to. */ + int listenfd; /**< The listener sockfd. */ +}; + +/* refer to hw/virtio/vhost-user.c */ + +typedef enum VhostUserRequest { + VHOST_USER_NONE = 0, + VHOST_USER_GET_FEATURES = 1, + VHOST_USER_SET_FEATURES = 2, + VHOST_USER_SET_OWNER = 3, + VHOST_USER_RESET_OWNER = 4, + VHOST_USER_SET_MEM_TABLE = 5, + VHOST_USER_SET_LOG_BASE = 6, + VHOST_USER_SET_LOG_FD = 7, + VHOST_USER_SET_VRING_NUM = 8, + VHOST_USER_SET_VRING_ADDR = 9, + VHOST_USER_SET_VRING_BASE = 10, + VHOST_USER_GET_VRING_BASE = 11, + VHOST_USER_SET_VRING_KICK = 12, + VHOST_USER_SET_VRING_CALL = 13, + VHOST_USER_SET_VRING_ERR = 14, + VHOST_USER_GET_PROTOCOL_FEATURES = 15, + VHOST_USER_SET_PROTOCOL_FEATURES = 16, + VHOST_USER_GET_QUEUE_NUM = 17, + VHOST_USER_SET_VRING_ENABLE = 18, + VHOST_USER_SEND_RARP = 19, + VHOST_USER_MAX +} VhostUserRequest; + +typedef struct VhostUserMemoryRegion { + uint64_t guest_phys_addr; + uint64_t memory_size; + uint64_t userspace_addr; + uint64_t mmap_offset; +} VhostUserMemoryRegion; + +typedef struct VhostUserMemory { + uint32_t nregions; + uint32_t padding; + VhostUserMemoryRegion regions[VHOST_MEMORY_MAX_NREGIONS]; +} VhostUserMemory; + +typedef struct VhostUserLog { + uint64_t mmap_size; + uint64_t mmap_offset; +} VhostUserLog; + +typedef struct VhostUserMsg { + VhostUserRequest request; + +#define VHOST_USER_VERSION_MASK 0x3 +#define VHOST_USER_REPLY_MASK (0x1 << 2) + uint32_t flags; + uint32_t size; /* the following payload size */ + union { +#define VHOST_USER_VRING_IDX_MASK 0xff +#define VHOST_USER_VRING_NOFD_MASK (0x1<<8) + uint64_t u64; + struct vhost_vring_state state; + struct vhost_vring_addr addr; + VhostUserMemory memory; + VhostUserLog log; + } payload; + int fds[VHOST_MEMORY_MAX_NREGIONS]; +} __attribute((packed)) VhostUserMsg; + +#define VHOST_USER_HDR_SIZE offsetof(VhostUserMsg, payload.u64) + +/* The version of the protocol we support */ +#define VHOST_USER_VERSION 0x1 + +/*****************************************************************************/ +#endif diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.c b/lib/librte_vhost/vhost_user/virtio-net-user.c new file mode 100644 index 00000000..f5248bc4 --- /dev/null +++ b/lib/librte_vhost/vhost_user/virtio-net-user.c @@ -0,0 +1,446 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "virtio-net.h" +#include "virtio-net-user.h" +#include "vhost-net-user.h" +#include "vhost-net.h" + +struct orig_region_map { + int fd; + uint64_t mapped_address; + uint64_t mapped_size; + uint64_t blksz; +}; + +#define orig_region(ptr, nregions) \ + ((struct orig_region_map *)RTE_PTR_ADD((ptr), \ + sizeof(struct virtio_memory) + \ + sizeof(struct virtio_memory_regions) * (nregions))) + +static uint64_t +get_blk_size(int fd) +{ + struct stat stat; + + fstat(fd, &stat); + return (uint64_t)stat.st_blksize; +} + +static void +free_mem_region(struct virtio_net *dev) +{ + struct orig_region_map *region; + unsigned int idx; + + if (!dev || !dev->mem) + return; + + region = orig_region(dev->mem, dev->mem->nregions); + for (idx = 0; idx < dev->mem->nregions; idx++) { + if (region[idx].mapped_address) { + munmap((void *)(uintptr_t)region[idx].mapped_address, + region[idx].mapped_size); + close(region[idx].fd); + } + } +} + +void +vhost_backend_cleanup(struct virtio_net *dev) +{ + if (dev->mem) { + free_mem_region(dev); + free(dev->mem); + dev->mem = NULL; + } +} + +int +user_set_mem_table(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +{ + struct VhostUserMemory memory = pmsg->payload.memory; + struct virtio_memory_regions *pregion; + uint64_t mapped_address, mapped_size; + struct virtio_net *dev; + unsigned int idx = 0; + struct orig_region_map *pregion_orig; + uint64_t alignment; + + /* unmap old memory regions one by one*/ + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* Remove from the data plane. */ + if (dev->flags & VIRTIO_DEV_RUNNING) + notify_ops->destroy_device(dev); + + if (dev->mem) { + free_mem_region(dev); + free(dev->mem); + dev->mem = NULL; + } + + dev->mem = calloc(1, + sizeof(struct virtio_memory) + + sizeof(struct virtio_memory_regions) * memory.nregions + + sizeof(struct orig_region_map) * memory.nregions); + if (dev->mem == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to allocate memory for dev->mem\n", + dev->device_fh); + return -1; + } + dev->mem->nregions = memory.nregions; + + pregion_orig = orig_region(dev->mem, memory.nregions); + for (idx = 0; idx < memory.nregions; idx++) { + pregion = &dev->mem->regions[idx]; + pregion->guest_phys_address = + memory.regions[idx].guest_phys_addr; + pregion->guest_phys_address_end = + memory.regions[idx].guest_phys_addr + + memory.regions[idx].memory_size; + pregion->memory_size = + memory.regions[idx].memory_size; + pregion->userspace_address = + memory.regions[idx].userspace_addr; + + /* This is ugly */ + mapped_size = memory.regions[idx].memory_size + + memory.regions[idx].mmap_offset; + + /* mmap() without flag of MAP_ANONYMOUS, should be called + * with length argument aligned with hugepagesz at older + * longterm version Linux, like 2.6.32 and 3.2.72, or + * mmap() will fail with EINVAL. + * + * to avoid failure, make sure in caller to keep length + * aligned. + */ + alignment = get_blk_size(pmsg->fds[idx]); + mapped_size = RTE_ALIGN_CEIL(mapped_size, alignment); + + mapped_address = (uint64_t)(uintptr_t)mmap(NULL, + mapped_size, + PROT_READ | PROT_WRITE, MAP_SHARED, + pmsg->fds[idx], + 0); + + RTE_LOG(INFO, VHOST_CONFIG, + "mapped region %d fd:%d to:%p sz:0x%"PRIx64" " + "off:0x%"PRIx64" align:0x%"PRIx64"\n", + idx, pmsg->fds[idx], (void *)(uintptr_t)mapped_address, + mapped_size, memory.regions[idx].mmap_offset, + alignment); + + if (mapped_address == (uint64_t)(uintptr_t)MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, + "mmap qemu guest failed.\n"); + goto err_mmap; + } + + pregion_orig[idx].mapped_address = mapped_address; + pregion_orig[idx].mapped_size = mapped_size; + pregion_orig[idx].blksz = alignment; + pregion_orig[idx].fd = pmsg->fds[idx]; + + mapped_address += memory.regions[idx].mmap_offset; + + pregion->address_offset = mapped_address - + pregion->guest_phys_address; + + if (memory.regions[idx].guest_phys_addr == 0) { + dev->mem->base_address = + memory.regions[idx].userspace_addr; + dev->mem->mapped_address = + pregion->address_offset; + } + + LOG_DEBUG(VHOST_CONFIG, + "REGION: %u GPA: %p QEMU VA: %p SIZE (%"PRIu64")\n", + idx, + (void *)(uintptr_t)pregion->guest_phys_address, + (void *)(uintptr_t)pregion->userspace_address, + pregion->memory_size); + } + + return 0; + +err_mmap: + while (idx--) { + munmap((void *)(uintptr_t)pregion_orig[idx].mapped_address, + pregion_orig[idx].mapped_size); + close(pregion_orig[idx].fd); + } + free(dev->mem); + dev->mem = NULL; + return -1; +} + +static int +vq_is_ready(struct vhost_virtqueue *vq) +{ + return vq && vq->desc && + vq->kickfd != VIRTIO_UNINITIALIZED_EVENTFD && + vq->callfd != VIRTIO_UNINITIALIZED_EVENTFD; +} + +static int +virtio_is_ready(struct virtio_net *dev) +{ + struct vhost_virtqueue *rvq, *tvq; + uint32_t i; + + for (i = 0; i < dev->virt_qp_nb; i++) { + rvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ]; + tvq = dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ]; + + if (!vq_is_ready(rvq) || !vq_is_ready(tvq)) { + RTE_LOG(INFO, VHOST_CONFIG, + "virtio is not ready for processing.\n"); + return 0; + } + } + + RTE_LOG(INFO, VHOST_CONFIG, + "virtio is now ready for processing.\n"); + return 1; +} + +void +user_set_vring_call(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +{ + struct vhost_vring_file file; + + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) + file.fd = VIRTIO_INVALID_EVENTFD; + else + file.fd = pmsg->fds[0]; + RTE_LOG(INFO, VHOST_CONFIG, + "vring call idx:%d file:%d\n", file.index, file.fd); + vhost_set_vring_call(ctx, &file); +} + + +/* + * In vhost-user, when we receive kick message, will test whether virtio + * device is ready for packet processing. + */ +void +user_set_vring_kick(struct vhost_device_ctx ctx, struct VhostUserMsg *pmsg) +{ + struct vhost_vring_file file; + struct virtio_net *dev = get_device(ctx); + + file.index = pmsg->payload.u64 & VHOST_USER_VRING_IDX_MASK; + if (pmsg->payload.u64 & VHOST_USER_VRING_NOFD_MASK) + file.fd = VIRTIO_INVALID_EVENTFD; + else + file.fd = pmsg->fds[0]; + RTE_LOG(INFO, VHOST_CONFIG, + "vring kick idx:%d file:%d\n", file.index, file.fd); + vhost_set_vring_kick(ctx, &file); + + if (virtio_is_ready(dev) && + !(dev->flags & VIRTIO_DEV_RUNNING)) + notify_ops->new_device(dev); +} + +/* + * when virtio is stopped, qemu will send us the GET_VRING_BASE message. + */ +int +user_get_vring_base(struct vhost_device_ctx ctx, + struct vhost_vring_state *state) +{ + struct virtio_net *dev = get_device(ctx); + + if (dev == NULL) + return -1; + /* We have to stop the queue (virtio) if it is running. */ + if (dev->flags & VIRTIO_DEV_RUNNING) + notify_ops->destroy_device(dev); + + /* Here we are safe to get the last used index */ + vhost_get_vring_base(ctx, state->index, state); + + RTE_LOG(INFO, VHOST_CONFIG, + "vring base idx:%d file:%d\n", state->index, state->num); + /* + * Based on current qemu vhost-user implementation, this message is + * sent and only sent in vhost_vring_stop. + * TODO: cleanup the vring, it isn't usable since here. + */ + if (dev->virtqueue[state->index]->kickfd >= 0) + close(dev->virtqueue[state->index]->kickfd); + + dev->virtqueue[state->index]->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + + return 0; +} + +/* + * when virtio queues are ready to work, qemu will send us to + * enable the virtio queue pair. + */ +int +user_set_vring_enable(struct vhost_device_ctx ctx, + struct vhost_vring_state *state) +{ + struct virtio_net *dev = get_device(ctx); + int enable = (int)state->num; + + RTE_LOG(INFO, VHOST_CONFIG, + "set queue enable: %d to qp idx: %d\n", + enable, state->index); + + if (notify_ops->vring_state_changed) { + notify_ops->vring_state_changed(dev, state->index, enable); + } + + dev->virtqueue[state->index]->enabled = enable; + + return 0; +} + +void +user_set_protocol_features(struct vhost_device_ctx ctx, + uint64_t protocol_features) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL || protocol_features & ~VHOST_USER_PROTOCOL_FEATURES) + return; + + dev->protocol_features = protocol_features; +} + +int +user_set_log_base(struct vhost_device_ctx ctx, + struct VhostUserMsg *msg) +{ + struct virtio_net *dev; + int fd = msg->fds[0]; + uint64_t size, off; + void *addr; + + dev = get_device(ctx); + if (!dev) + return -1; + + if (fd < 0) { + RTE_LOG(ERR, VHOST_CONFIG, "invalid log fd: %d\n", fd); + return -1; + } + + if (msg->size != sizeof(VhostUserLog)) { + RTE_LOG(ERR, VHOST_CONFIG, + "invalid log base msg size: %"PRId32" != %d\n", + msg->size, (int)sizeof(VhostUserLog)); + return -1; + } + + size = msg->payload.log.mmap_size; + off = msg->payload.log.mmap_offset; + RTE_LOG(INFO, VHOST_CONFIG, + "log mmap size: %"PRId64", offset: %"PRId64"\n", + size, off); + + /* + * mmap from 0 to workaround a hugepage mmap bug: mmap will + * fail when offset is not page size aligned. + */ + addr = mmap(0, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, VHOST_CONFIG, "mmap log base failed!\n"); + return -1; + } + + /* TODO: unmap on stop */ + dev->log_base = (uint64_t)(uintptr_t)addr + off; + dev->log_size = size; + + return 0; +} + +/* + * An rarp packet is constructed and broadcasted to notify switches about + * the new location of the migrated VM, so that packets from outside will + * not be lost after migration. + * + * However, we don't actually "send" a rarp packet here, instead, we set + * a flag 'broadcast_rarp' to let rte_vhost_dequeue_burst() inject it. + */ +int +user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *msg) +{ + struct virtio_net *dev; + uint8_t *mac = (uint8_t *)&msg->payload.u64; + + dev = get_device(ctx); + if (!dev) + return -1; + + RTE_LOG(DEBUG, VHOST_CONFIG, + ":: mac: %02x:%02x:%02x:%02x:%02x:%02x\n", + mac[0], mac[1], mac[2], mac[3], mac[4], mac[5]); + memcpy(dev->mac.addr_bytes, mac, 6); + + /* + * Set the flag to inject a RARP broadcast packet at + * rte_vhost_dequeue_burst(). + * + * rte_smp_wmb() is for making sure the mac is copied + * before the flag is set. + */ + rte_smp_wmb(); + rte_atomic16_set(&dev->broadcast_rarp, 1); + + return 0; +} diff --git a/lib/librte_vhost/vhost_user/virtio-net-user.h b/lib/librte_vhost/vhost_user/virtio-net-user.h new file mode 100644 index 00000000..cefec162 --- /dev/null +++ b/lib/librte_vhost/vhost_user/virtio-net-user.h @@ -0,0 +1,64 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_NET_USER_H +#define _VIRTIO_NET_USER_H + +#include "vhost-net.h" +#include "vhost-net-user.h" + +#define VHOST_USER_PROTOCOL_F_MQ 0 +#define VHOST_USER_PROTOCOL_F_LOG_SHMFD 1 +#define VHOST_USER_PROTOCOL_F_RARP 2 + +#define VHOST_USER_PROTOCOL_FEATURES ((1ULL << VHOST_USER_PROTOCOL_F_MQ) | \ + (1ULL << VHOST_USER_PROTOCOL_F_LOG_SHMFD) |\ + (1ULL << VHOST_USER_PROTOCOL_F_RARP)) + +int user_set_mem_table(struct vhost_device_ctx, struct VhostUserMsg *); + +void user_set_vring_call(struct vhost_device_ctx, struct VhostUserMsg *); + +void user_set_vring_kick(struct vhost_device_ctx, struct VhostUserMsg *); + +void user_set_protocol_features(struct vhost_device_ctx ctx, + uint64_t protocol_features); +int user_set_log_base(struct vhost_device_ctx ctx, struct VhostUserMsg *); +int user_send_rarp(struct vhost_device_ctx ctx, struct VhostUserMsg *); + +int user_get_vring_base(struct vhost_device_ctx, struct vhost_vring_state *); + +int user_set_vring_enable(struct vhost_device_ctx ctx, + struct vhost_vring_state *state); + +#endif diff --git a/lib/librte_vhost/virtio-net.c b/lib/librte_vhost/virtio-net.c new file mode 100644 index 00000000..d870ad97 --- /dev/null +++ b/lib/librte_vhost/virtio-net.c @@ -0,0 +1,772 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef RTE_LIBRTE_VHOST_NUMA +#include +#endif + +#include + +#include +#include +#include +#include +#include +#include + +#include "vhost-net.h" +#include "virtio-net.h" + +#define MAX_VHOST_DEVICE 1024 +static struct virtio_net *vhost_devices[MAX_VHOST_DEVICE]; + +/* device ops to add/remove device to/from data core. */ +struct virtio_net_device_ops const *notify_ops; + +#define VHOST_USER_F_PROTOCOL_FEATURES 30 + +/* Features supported by this lib. */ +#define VHOST_SUPPORTED_FEATURES ((1ULL << VIRTIO_NET_F_MRG_RXBUF) | \ + (1ULL << VIRTIO_NET_F_CTRL_VQ) | \ + (1ULL << VIRTIO_NET_F_CTRL_RX) | \ + (1ULL << VIRTIO_NET_F_GUEST_ANNOUNCE) | \ + (VHOST_SUPPORTS_MQ) | \ + (1ULL << VIRTIO_F_VERSION_1) | \ + (1ULL << VHOST_F_LOG_ALL) | \ + (1ULL << VHOST_USER_F_PROTOCOL_FEATURES) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO4) | \ + (1ULL << VIRTIO_NET_F_HOST_TSO6) | \ + (1ULL << VIRTIO_NET_F_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_CSUM) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO4) | \ + (1ULL << VIRTIO_NET_F_GUEST_TSO6)) + +static uint64_t VHOST_FEATURES = VHOST_SUPPORTED_FEATURES; + + +/* + * Converts QEMU virtual address to Vhost virtual address. This function is + * used to convert the ring addresses to our address space. + */ +static uint64_t +qva_to_vva(struct virtio_net *dev, uint64_t qemu_va) +{ + struct virtio_memory_regions *region; + uint64_t vhost_va = 0; + uint32_t regionidx = 0; + + /* Find the region where the address lives. */ + for (regionidx = 0; regionidx < dev->mem->nregions; regionidx++) { + region = &dev->mem->regions[regionidx]; + if ((qemu_va >= region->userspace_address) && + (qemu_va <= region->userspace_address + + region->memory_size)) { + vhost_va = qemu_va + region->guest_phys_address + + region->address_offset - + region->userspace_address; + break; + } + } + return vhost_va; +} + + +struct virtio_net * +get_device(struct vhost_device_ctx ctx) +{ + struct virtio_net *dev = vhost_devices[ctx.fh]; + + if (unlikely(!dev)) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") device not found.\n", ctx.fh); + } + + return dev; +} + +static void +cleanup_vq(struct vhost_virtqueue *vq, int destroy) +{ + if ((vq->callfd >= 0) && (destroy != 0)) + close(vq->callfd); + if (vq->kickfd >= 0) + close(vq->kickfd); +} + +/* + * Unmap any memory, close any file descriptors and + * free any memory owned by a device. + */ +static void +cleanup_device(struct virtio_net *dev, int destroy) +{ + uint32_t i; + + vhost_backend_cleanup(dev); + + for (i = 0; i < dev->virt_qp_nb; i++) { + cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_RXQ], destroy); + cleanup_vq(dev->virtqueue[i * VIRTIO_QNUM + VIRTIO_TXQ], destroy); + } +} + +/* + * Release virtqueues and device memory. + */ +static void +free_device(struct virtio_net *dev) +{ + uint32_t i; + + for (i = 0; i < dev->virt_qp_nb; i++) + rte_free(dev->virtqueue[i * VIRTIO_QNUM]); + + rte_free(dev); +} + +static void +init_vring_queue(struct vhost_virtqueue *vq, int qp_idx) +{ + memset(vq, 0, sizeof(struct vhost_virtqueue)); + + vq->kickfd = VIRTIO_UNINITIALIZED_EVENTFD; + vq->callfd = VIRTIO_UNINITIALIZED_EVENTFD; + + /* Backends are set to -1 indicating an inactive device. */ + vq->backend = -1; + + /* always set the default vq pair to enabled */ + if (qp_idx == 0) + vq->enabled = 1; +} + +static void +init_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) +{ + uint32_t base_idx = qp_idx * VIRTIO_QNUM; + + init_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); + init_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); +} + +static void +reset_vring_queue(struct vhost_virtqueue *vq, int qp_idx) +{ + int callfd; + + callfd = vq->callfd; + init_vring_queue(vq, qp_idx); + vq->callfd = callfd; +} + +static void +reset_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) +{ + uint32_t base_idx = qp_idx * VIRTIO_QNUM; + + reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_RXQ], qp_idx); + reset_vring_queue(dev->virtqueue[base_idx + VIRTIO_TXQ], qp_idx); +} + +static int +alloc_vring_queue_pair(struct virtio_net *dev, uint32_t qp_idx) +{ + struct vhost_virtqueue *virtqueue = NULL; + uint32_t virt_rx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_RXQ; + uint32_t virt_tx_q_idx = qp_idx * VIRTIO_QNUM + VIRTIO_TXQ; + + virtqueue = rte_malloc(NULL, + sizeof(struct vhost_virtqueue) * VIRTIO_QNUM, 0); + if (virtqueue == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to allocate memory for virt qp:%d.\n", qp_idx); + return -1; + } + + dev->virtqueue[virt_rx_q_idx] = virtqueue; + dev->virtqueue[virt_tx_q_idx] = virtqueue + VIRTIO_TXQ; + + init_vring_queue_pair(dev, qp_idx); + + dev->virt_qp_nb += 1; + + return 0; +} + +/* + * Reset some variables in device structure, while keeping few + * others untouched, such as device_fh, ifname, virt_qp_nb: they + * should be same unless the device is removed. + */ +static void +reset_device(struct virtio_net *dev) +{ + uint32_t i; + + dev->features = 0; + dev->protocol_features = 0; + dev->flags = 0; + + for (i = 0; i < dev->virt_qp_nb; i++) + reset_vring_queue_pair(dev, i); +} + +/* + * Function is called from the CUSE open function. The device structure is + * initialised and a new entry is added to the device configuration linked + * list. + */ +int +vhost_new_device(struct vhost_device_ctx ctx) +{ + struct virtio_net *dev; + int i; + + dev = rte_zmalloc(NULL, sizeof(struct virtio_net), 0); + if (dev == NULL) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to allocate memory for dev.\n", + ctx.fh); + return -1; + } + + for (i = 0; i < MAX_VHOST_DEVICE; i++) { + if (vhost_devices[i] == NULL) + break; + } + if (i == MAX_VHOST_DEVICE) { + RTE_LOG(ERR, VHOST_CONFIG, + "Failed to find a free slot for new device.\n"); + return -1; + } + + vhost_devices[i] = dev; + dev->device_fh = i; + + return i; +} + +/* + * Function is called from the CUSE release function. This function will + * cleanup the device and remove it from device configuration linked list. + */ +void +vhost_destroy_device(struct vhost_device_ctx ctx) +{ + struct virtio_net *dev = get_device(ctx); + + if (dev == NULL) + return; + + if (dev->flags & VIRTIO_DEV_RUNNING) + notify_ops->destroy_device(dev); + + cleanup_device(dev, 1); + free_device(dev); + + vhost_devices[ctx.fh] = NULL; +} + +void +vhost_set_ifname(struct vhost_device_ctx ctx, + const char *if_name, unsigned int if_len) +{ + struct virtio_net *dev; + unsigned int len; + + dev = get_device(ctx); + if (dev == NULL) + return; + + len = if_len > sizeof(dev->ifname) ? + sizeof(dev->ifname) : if_len; + + strncpy(dev->ifname, if_name, len); +} + + +/* + * Called from CUSE IOCTL: VHOST_SET_OWNER + * This function just returns success at the moment unless + * the device hasn't been initialised. + */ +int +vhost_set_owner(struct vhost_device_ctx ctx) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_RESET_OWNER + */ +int +vhost_reset_owner(struct vhost_device_ctx ctx) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + if (dev->flags & VIRTIO_DEV_RUNNING) + notify_ops->destroy_device(dev); + + cleanup_device(dev, 0); + reset_device(dev); + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_GET_FEATURES + * The features that we support are requested. + */ +int +vhost_get_features(struct vhost_device_ctx ctx, uint64_t *pu) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* Send our supported features. */ + *pu = VHOST_FEATURES; + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_SET_FEATURES + * We receive the negotiated features supported by us and the virtio device. + */ +int +vhost_set_features(struct vhost_device_ctx ctx, uint64_t *pu) +{ + struct virtio_net *dev; + uint16_t vhost_hlen; + uint16_t i; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + if (*pu & ~VHOST_FEATURES) + return -1; + + dev->features = *pu; + if (dev->features & + ((1 << VIRTIO_NET_F_MRG_RXBUF) | (1ULL << VIRTIO_F_VERSION_1))) { + vhost_hlen = sizeof(struct virtio_net_hdr_mrg_rxbuf); + } else { + vhost_hlen = sizeof(struct virtio_net_hdr); + } + LOG_DEBUG(VHOST_CONFIG, + "(%"PRIu64") Mergeable RX buffers %s, virtio 1 %s\n", + dev->device_fh, + (dev->features & (1 << VIRTIO_NET_F_MRG_RXBUF)) ? "on" : "off", + (dev->features & (1ULL << VIRTIO_F_VERSION_1)) ? "on" : "off"); + + for (i = 0; i < dev->virt_qp_nb; i++) { + uint16_t base_idx = i * VIRTIO_QNUM; + + dev->virtqueue[base_idx + VIRTIO_RXQ]->vhost_hlen = vhost_hlen; + dev->virtqueue[base_idx + VIRTIO_TXQ]->vhost_hlen = vhost_hlen; + } + + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_SET_VRING_NUM + * The virtio device sends us the size of the descriptor ring. + */ +int +vhost_set_vring_num(struct vhost_device_ctx ctx, + struct vhost_vring_state *state) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* State->index refers to the queue index. The txq is 1, rxq is 0. */ + dev->virtqueue[state->index]->size = state->num; + + return 0; +} + +/* + * Reallocate virtio_dev and vhost_virtqueue data structure to make them on the + * same numa node as the memory of vring descriptor. + */ +#ifdef RTE_LIBRTE_VHOST_NUMA +static struct virtio_net* +numa_realloc(struct virtio_net *dev, int index) +{ + int oldnode, newnode; + struct virtio_net *old_dev; + struct vhost_virtqueue *old_vq, *vq; + int ret; + + /* + * vq is allocated on pairs, we should try to do realloc + * on first queue of one queue pair only. + */ + if (index % VIRTIO_QNUM != 0) + return dev; + + old_dev = dev; + vq = old_vq = dev->virtqueue[index]; + + ret = get_mempolicy(&newnode, NULL, 0, old_vq->desc, + MPOL_F_NODE | MPOL_F_ADDR); + + /* check if we need to reallocate vq */ + ret |= get_mempolicy(&oldnode, NULL, 0, old_vq, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, + "Unable to get vq numa information.\n"); + return dev; + } + if (oldnode != newnode) { + RTE_LOG(INFO, VHOST_CONFIG, + "reallocate vq from %d to %d node\n", oldnode, newnode); + vq = rte_malloc_socket(NULL, sizeof(*vq) * VIRTIO_QNUM, 0, + newnode); + if (!vq) + return dev; + + memcpy(vq, old_vq, sizeof(*vq) * VIRTIO_QNUM); + rte_free(old_vq); + } + + /* check if we need to reallocate dev */ + ret = get_mempolicy(&oldnode, NULL, 0, old_dev, + MPOL_F_NODE | MPOL_F_ADDR); + if (ret) { + RTE_LOG(ERR, VHOST_CONFIG, + "Unable to get dev numa information.\n"); + goto out; + } + if (oldnode != newnode) { + RTE_LOG(INFO, VHOST_CONFIG, + "reallocate dev from %d to %d node\n", + oldnode, newnode); + dev = rte_malloc_socket(NULL, sizeof(*dev), 0, newnode); + if (!dev) { + dev = old_dev; + goto out; + } + + memcpy(dev, old_dev, sizeof(*dev)); + rte_free(old_dev); + } + +out: + dev->virtqueue[index] = vq; + dev->virtqueue[index + 1] = vq + 1; + vhost_devices[dev->device_fh] = dev; + + return dev; +} +#else +static struct virtio_net* +numa_realloc(struct virtio_net *dev, int index __rte_unused) +{ + return dev; +} +#endif + +/* + * Called from CUSE IOCTL: VHOST_SET_VRING_ADDR + * The virtio device sends us the desc, used and avail ring addresses. + * This function then converts these to our address space. + */ +int +vhost_set_vring_addr(struct vhost_device_ctx ctx, struct vhost_vring_addr *addr) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(ctx); + if ((dev == NULL) || (dev->mem == NULL)) + return -1; + + /* addr->index refers to the queue index. The txq 1, rxq is 0. */ + vq = dev->virtqueue[addr->index]; + + /* The addresses are converted from QEMU virtual to Vhost virtual. */ + vq->desc = (struct vring_desc *)(uintptr_t)qva_to_vva(dev, + addr->desc_user_addr); + if (vq->desc == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to find desc ring address.\n", + dev->device_fh); + return -1; + } + + dev = numa_realloc(dev, addr->index); + vq = dev->virtqueue[addr->index]; + + vq->avail = (struct vring_avail *)(uintptr_t)qva_to_vva(dev, + addr->avail_user_addr); + if (vq->avail == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to find avail ring address.\n", + dev->device_fh); + return -1; + } + + vq->used = (struct vring_used *)(uintptr_t)qva_to_vva(dev, + addr->used_user_addr); + if (vq->used == 0) { + RTE_LOG(ERR, VHOST_CONFIG, + "(%"PRIu64") Failed to find used ring address.\n", + dev->device_fh); + return -1; + } + + vq->log_guest_addr = addr->log_guest_addr; + + LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address desc: %p\n", + dev->device_fh, vq->desc); + LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address avail: %p\n", + dev->device_fh, vq->avail); + LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") mapped address used: %p\n", + dev->device_fh, vq->used); + LOG_DEBUG(VHOST_CONFIG, "(%"PRIu64") log_guest_addr: %"PRIx64"\n", + dev->device_fh, vq->log_guest_addr); + + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_SET_VRING_BASE + * The virtio device sends us the available ring last used index. + */ +int +vhost_set_vring_base(struct vhost_device_ctx ctx, + struct vhost_vring_state *state) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* State->index refers to the queue index. The txq is 1, rxq is 0. */ + dev->virtqueue[state->index]->last_used_idx = state->num; + dev->virtqueue[state->index]->last_used_idx_res = state->num; + + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_GET_VRING_BASE + * We send the virtio device our available ring last used index. + */ +int +vhost_get_vring_base(struct vhost_device_ctx ctx, uint32_t index, + struct vhost_vring_state *state) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + state->index = index; + /* State->index refers to the queue index. The txq is 1, rxq is 0. */ + state->num = dev->virtqueue[state->index]->last_used_idx; + + return 0; +} + + +/* + * Called from CUSE IOCTL: VHOST_SET_VRING_CALL + * The virtio device sends an eventfd to interrupt the guest. This fd gets + * copied into our process space. + */ +int +vhost_set_vring_call(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + uint32_t cur_qp_idx = file->index / VIRTIO_QNUM; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* + * FIXME: VHOST_SET_VRING_CALL is the first per-vring message + * we get, so we do vring queue pair allocation here. + */ + if (cur_qp_idx + 1 > dev->virt_qp_nb) { + if (alloc_vring_queue_pair(dev, cur_qp_idx) < 0) + return -1; + } + + /* file->index refers to the queue index. The txq is 1, rxq is 0. */ + vq = dev->virtqueue[file->index]; + assert(vq != NULL); + + if (vq->callfd >= 0) + close(vq->callfd); + + vq->callfd = file->fd; + + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_SET_VRING_KICK + * The virtio device sends an eventfd that it can use to notify us. + * This fd gets copied into our process space. + */ +int +vhost_set_vring_kick(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +{ + struct virtio_net *dev; + struct vhost_virtqueue *vq; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* file->index refers to the queue index. The txq is 1, rxq is 0. */ + vq = dev->virtqueue[file->index]; + + if (vq->kickfd >= 0) + close(vq->kickfd); + + vq->kickfd = file->fd; + + return 0; +} + +/* + * Called from CUSE IOCTL: VHOST_NET_SET_BACKEND + * To complete device initialisation when the virtio driver is loaded, + * we are provided with a valid fd for a tap device (not used by us). + * If this happens then we can add the device to a data core. + * When the virtio driver is removed we get fd=-1. + * At that point we remove the device from the data core. + * The device will still exist in the device configuration linked list. + */ +int +vhost_set_backend(struct vhost_device_ctx ctx, struct vhost_vring_file *file) +{ + struct virtio_net *dev; + + dev = get_device(ctx); + if (dev == NULL) + return -1; + + /* file->index refers to the queue index. The txq is 1, rxq is 0. */ + dev->virtqueue[file->index]->backend = file->fd; + + /* + * If the device isn't already running and both backend fds are set, + * we add the device. + */ + if (!(dev->flags & VIRTIO_DEV_RUNNING)) { + if (((int)dev->virtqueue[VIRTIO_TXQ]->backend != VIRTIO_DEV_STOPPED) && + ((int)dev->virtqueue[VIRTIO_RXQ]->backend != VIRTIO_DEV_STOPPED)) { + return notify_ops->new_device(dev); + } + /* Otherwise we remove it. */ + } else + if (file->fd == VIRTIO_DEV_STOPPED) + notify_ops->destroy_device(dev); + return 0; +} + +int rte_vhost_enable_guest_notification(struct virtio_net *dev, + uint16_t queue_id, int enable) +{ + if (enable) { + RTE_LOG(ERR, VHOST_CONFIG, + "guest notification isn't supported.\n"); + return -1; + } + + dev->virtqueue[queue_id]->used->flags = VRING_USED_F_NO_NOTIFY; + return 0; +} + +uint64_t rte_vhost_feature_get(void) +{ + return VHOST_FEATURES; +} + +int rte_vhost_feature_disable(uint64_t feature_mask) +{ + VHOST_FEATURES = VHOST_FEATURES & ~feature_mask; + return 0; +} + +int rte_vhost_feature_enable(uint64_t feature_mask) +{ + if ((feature_mask & VHOST_SUPPORTED_FEATURES) == feature_mask) { + VHOST_FEATURES = VHOST_FEATURES | feature_mask; + return 0; + } + return -1; +} + +/* + * Register ops so that we can add/remove device to data core. + */ +int +rte_vhost_driver_callback_register(struct virtio_net_device_ops const * const ops) +{ + notify_ops = ops; + + return 0; +} diff --git a/lib/librte_vhost/virtio-net.h b/lib/librte_vhost/virtio-net.h new file mode 100644 index 00000000..75fb57e5 --- /dev/null +++ b/lib/librte_vhost/virtio-net.h @@ -0,0 +1,43 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _VIRTIO_NET_H +#define _VIRTIO_NET_H + +#include "vhost-net.h" +#include "rte_virtio_net.h" + +struct virtio_net_device_ops const *notify_ops; +struct virtio_net *get_device(struct vhost_device_ctx ctx); + +#endif -- cgit 1.2.3-korg